diff options
author | Pirama Arumuga Nainar <pirama@google.com> | 2015-05-06 11:46:36 -0700 |
---|---|---|
committer | Pirama Arumuga Nainar <pirama@google.com> | 2015-05-18 10:52:30 -0700 |
commit | 2c3e0051c31c3f5b2328b447eadf1cf9c4427442 (patch) | |
tree | c0104029af14e9f47c2ef58ca60e6137691f3c9b /lib | |
parent | e1bc145815f4334641be19f1c45ecf85d25b6e5a (diff) | |
download | external_llvm-2c3e0051c31c3f5b2328b447eadf1cf9c4427442.zip external_llvm-2c3e0051c31c3f5b2328b447eadf1cf9c4427442.tar.gz external_llvm-2c3e0051c31c3f5b2328b447eadf1cf9c4427442.tar.bz2 |
Update aosp/master LLVM for rebase to r235153
Change-Id: I9bf53792f9fc30570e81a8d80d296c681d005ea7
(cherry picked from commit 0c7f116bb6950ef819323d855415b2f2b0aad987)
Diffstat (limited to 'lib')
563 files changed, 17617 insertions, 8940 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index 0b0fd50..43db176 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -82,6 +82,23 @@ void AliasAnalysis::addEscapingUse(Use &U) { AA->addEscapingUse(U); } +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(Instruction *I, ImmutableCallSite Call) { + // We may have two calls + if (auto CS = ImmutableCallSite(I)) { + // Check if the two calls modify the same memory + return getModRefInfo(Call, CS); + } else { + // Otherwise, check if the call modifies or references the + // location this memory access defines. The best we can say + // is that if the call references what this instruction + // defines, it must be clobbered by this location. + const AliasAnalysis::Location DefLoc = AA->getLocation(I); + if (getModRefInfo(Call, DefLoc) != AliasAnalysis::NoModRef) + return AliasAnalysis::ModRef; + } + return AliasAnalysis::NoModRef; +} AliasAnalysis::ModRefResult AliasAnalysis::getModRefInfo(ImmutableCallSite CS, @@ -330,7 +347,7 @@ AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) { // If the load address doesn't alias the given address, it doesn't read // or write the specified memory. - if (!alias(getLocation(L), Loc)) + if (Loc.Ptr && !alias(getLocation(L), Loc)) return NoModRef; // Otherwise, a load just reads. @@ -343,15 +360,18 @@ AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) { if (!S->isUnordered()) return ModRef; - // If the store address cannot alias the pointer in question, then the - // specified memory cannot be modified by the store. - if (!alias(getLocation(S), Loc)) - return NoModRef; + if (Loc.Ptr) { + // If the store address cannot alias the pointer in question, then the + // specified memory cannot be modified by the store. + if (!alias(getLocation(S), Loc)) + return NoModRef; - // If the pointer is a pointer to constant memory, then it could not have been - // modified by this store. - if (pointsToConstantMemory(Loc)) - return NoModRef; + // If the pointer is a pointer to constant memory, then it could not have + // been modified by this store. + if (pointsToConstantMemory(Loc)) + return NoModRef; + + } // Otherwise, a store just writes. return Mod; diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp index 5865259..a1bfba1 100644 --- a/lib/Analysis/AliasAnalysisCounter.cpp +++ b/lib/Analysis/AliasAnalysisCounter.cpp @@ -44,7 +44,7 @@ namespace { errs() << " " << Val << " " << Desc << " responses (" << Val*100/Sum << "%)\n"; } - ~AliasAnalysisCounter() { + ~AliasAnalysisCounter() override { unsigned AASum = No+May+Partial+Must; unsigned MRSum = NoMR+JustRef+JustMod+MR; if (AASum + MRSum) { // Print a report if any counted queries occurred... diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp index fe4bd4c..273eacc 100644 --- a/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -158,7 +158,7 @@ bool AAEval::runOnFunction(Function &F) { if (EvalAAMD && isa<StoreInst>(&*I)) Stores.insert(&*I); Instruction &Inst = *I; - if (CallSite CS = cast<Value>(&Inst)) { + if (auto CS = CallSite(&Inst)) { Value *Callee = CS.getCalledValue(); // Skip actual functions for direct function calls. if (!isa<Function>(Callee) && isInterestingPointer(Callee)) diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index 45442b0..4c79b9f 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -187,7 +187,7 @@ bool AliasSet::aliasesUnknownInst(Instruction *Inst, AliasAnalysis &AA) const { return false; for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) { - CallSite C1 = getUnknownInst(i), C2 = Inst; + CallSite C1(getUnknownInst(i)), C2(Inst); if (!C1 || !C2 || AA.getModRefInfo(C1, C2) != AliasAnalysis::NoModRef || AA.getModRefInfo(C2, C1) != AliasAnalysis::NoModRef) diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index 4549c1e..842ff0a 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -37,6 +37,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeCFLAliasAnalysisPass(Registry); initializeDependenceAnalysisPass(Registry); initializeDelinearizationPass(Registry); + initializeDivergenceAnalysisPass(Registry); initializeDominanceFrontierPass(Registry); initializeDomViewerPass(Registry); initializeDomPrinterPass(Registry); diff --git a/lib/Analysis/Android.mk b/lib/Analysis/Android.mk index 277956c..94a1d2e 100644 --- a/lib/Analysis/Android.mk +++ b/lib/Analysis/Android.mk @@ -22,6 +22,7 @@ analysis_SRC_FILES := \ CostModel.cpp \ Delinearization.cpp \ DependenceAnalysis.cpp \ + DivergenceAnalysis.cpp \ DomPrinter.cpp \ DominanceFrontier.cpp \ IVUsers.cpp \ diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index be2282f..2767e41 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -932,14 +932,14 @@ aliasSameBasePointerGEPs(const GEPOperator *GEP1, uint64_t V1Size, // Also, check that they all index through arrays. for (unsigned i = 1, e = GEP1->getNumIndices() - 1; i != e; ++i) { if (!isa<ArrayType>(GetElementPtrInst::getIndexedType( - GEP1->getPointerOperandType(), IntermediateIndices))) + GEP1->getSourceElementType(), IntermediateIndices))) return AliasAnalysis::MayAlias; IntermediateIndices.push_back(GEP1->getOperand(i + 1)); } StructType *LastIndexedStruct = dyn_cast<StructType>(GetElementPtrInst::getIndexedType( - GEP1->getPointerOperandType(), IntermediateIndices)); + GEP1->getSourceElementType(), IntermediateIndices)); if (!LastIndexedStruct) return AliasAnalysis::MayAlias; diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp index 37f2fae..3d819eb 100644 --- a/lib/Analysis/BlockFrequencyInfo.cpp +++ b/lib/Analysis/BlockFrequencyInfo.cpp @@ -85,7 +85,7 @@ struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits { std::string Result; raw_string_ostream OS(Result); - OS << Node->getName().str() << ":"; + OS << Node->getName() << ":"; switch (ViewBlockFreqPropagationDAG) { case GVDT_Fraction: Graph->printBlockFreq(OS, Node); diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp index 278073c..456cee1 100644 --- a/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -331,32 +331,35 @@ bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist( return true; } -/// \brief Get the maximum allowed loop scale. -/// -/// Gives the maximum number of estimated iterations allowed for a loop. Very -/// large numbers cause problems downstream (even within 64-bits). -static Scaled64 getMaxLoopScale() { return Scaled64(1, 12); } - /// \brief Compute the loop scale for a loop. void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) { // Compute loop scale. DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n"); + // Infinite loops need special handling. If we give the back edge an infinite + // mass, they may saturate all the other scales in the function down to 1, + // making all the other region temperatures look exactly the same. Choose an + // arbitrary scale to avoid these issues. + // + // FIXME: An alternate way would be to select a symbolic scale which is later + // replaced to be the maximum of all computed scales plus 1. This would + // appropriately describe the loop as having a large scale, without skewing + // the final frequency computation. + const Scaled64 InifiniteLoopScale(1, 12); + // LoopScale == 1 / ExitMass // ExitMass == HeadMass - BackedgeMass BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass; - // Block scale stores the inverse of the scale. - Loop.Scale = ExitMass.toScaled().inverse(); + // Block scale stores the inverse of the scale. If this is an infinite loop, + // its exit mass will be zero. In this case, use an arbitrary scale for the + // loop scale. + Loop.Scale = + ExitMass.isEmpty() ? InifiniteLoopScale : ExitMass.toScaled().inverse(); DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull() << " - " << Loop.BackedgeMass << ")\n" << " - scale = " << Loop.Scale << "\n"); - - if (Loop.Scale > getMaxLoopScale()) { - Loop.Scale = getMaxLoopScale(); - DEBUG(dbgs() << " - reduced-to-max-scale: " << getMaxLoopScale() << "\n"); - } } /// \brief Package up a loop. @@ -424,15 +427,24 @@ static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI, const Scaled64 &Min, const Scaled64 &Max) { // Scale the Factor to a size that creates integers. Ideally, integers would // be scaled so that Max == UINT64_MAX so that they can be best - // differentiated. However, the register allocator currently deals poorly - // with large numbers. Instead, push Min up a little from 1 to give some - // room to differentiate small, unequal numbers. - // - // TODO: fix issues downstream so that ScalingFactor can be - // Scaled64(1,64)/Max. - Scaled64 ScalingFactor = Min.inverse(); - if ((Max / Min).lg() < 60) + // differentiated. However, in the presence of large frequency values, small + // frequencies are scaled down to 1, making it impossible to differentiate + // small, unequal numbers. When the spread between Min and Max frequencies + // fits well within MaxBits, we make the scale be at least 8. + const unsigned MaxBits = 64; + const unsigned SpreadBits = (Max / Min).lg(); + Scaled64 ScalingFactor; + if (SpreadBits <= MaxBits - 3) { + // If the values are small enough, make the scaling factor at least 8 to + // allow distinguishing small values. + ScalingFactor = Min.inverse(); ScalingFactor <<= 3; + } else { + // If the values need more than MaxBits to be represented, saturate small + // frequency values down to 1 by using a scaling factor that benefits large + // frequency values. + ScalingFactor = Scaled64(1, MaxBits) / Max; + } // Translate the floats to integers. DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 14800f4..8799a71 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -379,6 +379,14 @@ bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) { if (!CV) return false; + // If the LHS is the result of AND'ing a value with a single bit bitmask, + // we don't have information about probabilities. + if (Instruction *LHS = dyn_cast<Instruction>(CI->getOperand(0))) + if (LHS->getOpcode() == Instruction::And) + if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) + if (AndRHS->getUniqueInteger().isPowerOf2()) + return false; + bool isProb; if (CV->isZero()) { switch (CI->getPredicate()) { @@ -499,25 +507,23 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) { // Walk the basic blocks in post-order so that we can build up state about // the successors of a block iteratively. - for (po_iterator<BasicBlock *> I = po_begin(&F.getEntryBlock()), - E = po_end(&F.getEntryBlock()); - I != E; ++I) { - DEBUG(dbgs() << "Computing probabilities for " << I->getName() << "\n"); - if (calcUnreachableHeuristics(*I)) + for (auto BB : post_order(&F.getEntryBlock())) { + DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n"); + if (calcUnreachableHeuristics(BB)) continue; - if (calcMetadataWeights(*I)) + if (calcMetadataWeights(BB)) continue; - if (calcColdCallHeuristics(*I)) + if (calcColdCallHeuristics(BB)) continue; - if (calcLoopBranchHeuristics(*I)) + if (calcLoopBranchHeuristics(BB)) continue; - if (calcPointerHeuristics(*I)) + if (calcPointerHeuristics(BB)) continue; - if (calcZeroHeuristics(*I)) + if (calcZeroHeuristics(BB)) continue; - if (calcFloatingPointHeuristics(*I)) + if (calcFloatingPointHeuristics(BB)) continue; - calcInvokeHeuristics(*I); + calcInvokeHeuristics(BB); } PostDominatedByUnreachable.clear(); diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp index 89787f82..c86f1f5 100644 --- a/lib/Analysis/CFGPrinter.cpp +++ b/lib/Analysis/CFGPrinter.cpp @@ -77,7 +77,7 @@ namespace { } bool runOnFunction(Function &F) override { - std::string Filename = "cfg." + F.getName().str() + ".dot"; + std::string Filename = ("cfg." + F.getName() + ".dot").str(); errs() << "Writing '" << Filename << "'..."; std::error_code EC; @@ -111,7 +111,7 @@ namespace { } bool runOnFunction(Function &F) override { - std::string Filename = "cfg." + F.getName().str() + ".dot"; + std::string Filename = ("cfg." + F.getName() + ".dot").str(); errs() << "Writing '" << Filename << "'..."; std::error_code EC; diff --git a/lib/Analysis/CFLAliasAnalysis.cpp b/lib/Analysis/CFLAliasAnalysis.cpp index 53d748d..3147992 100644 --- a/lib/Analysis/CFLAliasAnalysis.cpp +++ b/lib/Analysis/CFLAliasAnalysis.cpp @@ -161,7 +161,7 @@ struct FunctionHandle : public CallbackVH { assert(CFLAA != nullptr); } - virtual ~FunctionHandle() {} + ~FunctionHandle() override {} void deleted() override { removeSelfFromCache(); } void allUsesReplacedWith(Value *) override { removeSelfFromCache(); } @@ -189,7 +189,7 @@ public: initializeCFLAliasAnalysisPass(*PassRegistry::getPassRegistry()); } - virtual ~CFLAliasAnalysis() {} + ~CFLAliasAnalysis() override {} void getAnalysisUsage(AnalysisUsage &AU) const override { AliasAnalysis::getAnalysisUsage(AU); diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index ae40321..1335a6d 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -20,6 +20,7 @@ add_llvm_library(LLVMAnalysis ConstantFolding.cpp Delinearization.cpp DependenceAnalysis.cpp + DivergenceAnalysis.cpp DomPrinter.cpp DominanceFrontier.cpp IVUsers.cpp diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 995465d..a85e813 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -671,8 +671,8 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, /// If array indices are not pointer-sized integers, explicitly cast them so /// that they aren't implicitly casted by the getelementptr. -static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, Type *ResultTy, - const DataLayout &DL, +static Constant *CastGEPIndices(Type *SrcTy, ArrayRef<Constant *> Ops, + Type *ResultTy, const DataLayout &DL, const TargetLibraryInfo *TLI) { Type *IntPtrTy = DL.getIntPtrType(ResultTy); @@ -681,8 +681,9 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, Type *ResultTy, for (unsigned i = 1, e = Ops.size(); i != e; ++i) { if ((i == 1 || !isa<StructType>(GetElementPtrInst::getIndexedType( - Ops[0]->getType(), - Ops.slice(1, i - 1)))) && + cast<PointerType>(Ops[0]->getType()->getScalarType()) + ->getElementType(), + Ops.slice(1, i - 1)))) && Ops[i]->getType() != IntPtrTy) { Any = true; NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i], @@ -697,7 +698,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, Type *ResultTy, if (!Any) return nullptr; - Constant *C = ConstantExpr::getGetElementPtr(Ops[0], NewIdxs); + Constant *C = ConstantExpr::getGetElementPtr(SrcTy, Ops[0], NewIdxs); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI)) C = Folded; @@ -723,7 +724,7 @@ static Constant* StripPtrCastKeepAS(Constant* Ptr) { } /// If we can symbolically evaluate the GEP constant expression, do so. -static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, +static Constant *SymbolicallyEvaluateGEP(Type *SrcTy, ArrayRef<Constant *> Ops, Type *ResultTy, const DataLayout &DL, const TargetLibraryInfo *TLI) { Constant *Ptr = Ops[0]; @@ -865,7 +866,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, return nullptr; // Create a GEP. - Constant *C = ConstantExpr::getGetElementPtr(Ptr, NewIdxs); + Constant *C = ConstantExpr::getGetElementPtr(SrcTy, Ptr, NewIdxs); assert(C->getType()->getPointerElementType() == Ty && "Computed GetElementPtr has unexpected type!"); @@ -1085,13 +1086,15 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]); case Instruction::ShuffleVector: return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]); - case Instruction::GetElementPtr: - if (Constant *C = CastGEPIndices(Ops, DestTy, DL, TLI)) + case Instruction::GetElementPtr: { + Type *SrcTy = nullptr; + if (Constant *C = CastGEPIndices(SrcTy, Ops, DestTy, DL, TLI)) return C; - if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, DL, TLI)) + if (Constant *C = SymbolicallyEvaluateGEP(SrcTy, Ops, DestTy, DL, TLI)) return C; - return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1)); + return ConstantExpr::getGetElementPtr(SrcTy, Ops[0], Ops.slice(1)); + } } } diff --git a/lib/Analysis/DivergenceAnalysis.cpp b/lib/Analysis/DivergenceAnalysis.cpp new file mode 100644 index 0000000..e5ee295 --- /dev/null +++ b/lib/Analysis/DivergenceAnalysis.cpp @@ -0,0 +1,337 @@ +//===- DivergenceAnalysis.cpp ------ Divergence Analysis ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines divergence analysis which determines whether a branch in a +// GPU program is divergent. It can help branch optimizations such as jump +// threading and loop unswitching to make better decisions. +// +// GPU programs typically use the SIMD execution model, where multiple threads +// in the same execution group have to execute in lock-step. Therefore, if the +// code contains divergent branches (i.e., threads in a group do not agree on +// which path of the branch to take), the group of threads has to execute all +// the paths from that branch with different subsets of threads enabled until +// they converge at the immediately post-dominating BB of the paths. +// +// Due to this execution model, some optimizations such as jump +// threading and loop unswitching can be unfortunately harmful when performed on +// divergent branches. Therefore, an analysis that computes which branches in a +// GPU program are divergent can help the compiler to selectively run these +// optimizations. +// +// This file defines divergence analysis which computes a conservative but +// non-trivial approximation of all divergent branches in a GPU program. It +// partially implements the approach described in +// +// Divergence Analysis +// Sampaio, Souza, Collange, Pereira +// TOPLAS '13 +// +// The divergence analysis identifies the sources of divergence (e.g., special +// variables that hold the thread ID), and recursively marks variables that are +// data or sync dependent on a source of divergence as divergent. +// +// While data dependency is a well-known concept, the notion of sync dependency +// is worth more explanation. Sync dependence characterizes the control flow +// aspect of the propagation of branch divergence. For example, +// +// %cond = icmp slt i32 %tid, 10 +// br i1 %cond, label %then, label %else +// then: +// br label %merge +// else: +// br label %merge +// merge: +// %a = phi i32 [ 0, %then ], [ 1, %else ] +// +// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid +// because %tid is not on its use-def chains, %a is sync dependent on %tid +// because the branch "br i1 %cond" depends on %tid and affects which value %a +// is assigned to. +// +// The current implementation has the following limitations: +// 1. intra-procedural. It conservatively considers the arguments of a +// non-kernel-entry function and the return value of a function call as +// divergent. +// 2. memory as black box. It conservatively considers values loaded from +// generic or local address as divergent. This can be improved by leveraging +// pointer analysis. +//===----------------------------------------------------------------------===// + +#include <vector> +#include "llvm/IR/Dominators.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +using namespace llvm; + +#define DEBUG_TYPE "divergence" + +namespace { +class DivergenceAnalysis : public FunctionPass { +public: + static char ID; + + DivergenceAnalysis() : FunctionPass(ID) { + initializeDivergenceAnalysisPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<PostDominatorTree>(); + AU.setPreservesAll(); + } + + bool runOnFunction(Function &F) override; + + // Print all divergent branches in the function. + void print(raw_ostream &OS, const Module *) const override; + + // Returns true if V is divergent. + bool isDivergent(const Value *V) const { return DivergentValues.count(V); } + // Returns true if V is uniform/non-divergent. + bool isUniform(const Value *V) const { return !isDivergent(V); } + +private: + // Stores all divergent values. + DenseSet<const Value *> DivergentValues; +}; +} // End of anonymous namespace + +// Register this pass. +char DivergenceAnalysis::ID = 0; +INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis", + false, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTree) +INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis", + false, true) + +namespace { + +class DivergencePropagator { +public: + DivergencePropagator(Function &F, TargetTransformInfo &TTI, + DominatorTree &DT, PostDominatorTree &PDT, + DenseSet<const Value *> &DV) + : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {} + void populateWithSourcesOfDivergence(); + void propagate(); + +private: + // A helper function that explores data dependents of V. + void exploreDataDependency(Value *V); + // A helper function that explores sync dependents of TI. + void exploreSyncDependency(TerminatorInst *TI); + // Computes the influence region from Start to End. This region includes all + // basic blocks on any path from Start to End. + void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End, + DenseSet<BasicBlock *> &InfluenceRegion); + // Finds all users of I that are outside the influence region, and add these + // users to Worklist. + void findUsersOutsideInfluenceRegion( + Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion); + + Function &F; + TargetTransformInfo &TTI; + DominatorTree &DT; + PostDominatorTree &PDT; + std::vector<Value *> Worklist; // Stack for DFS. + DenseSet<const Value *> &DV; // Stores all divergent values. +}; + +void DivergencePropagator::populateWithSourcesOfDivergence() { + Worklist.clear(); + DV.clear(); + for (auto &I : inst_range(F)) { + if (TTI.isSourceOfDivergence(&I)) { + Worklist.push_back(&I); + DV.insert(&I); + } + } + for (auto &Arg : F.args()) { + if (TTI.isSourceOfDivergence(&Arg)) { + Worklist.push_back(&Arg); + DV.insert(&Arg); + } + } +} + +void DivergencePropagator::exploreSyncDependency(TerminatorInst *TI) { + // Propagation rule 1: if branch TI is divergent, all PHINodes in TI's + // immediate post dominator are divergent. This rule handles if-then-else + // patterns. For example, + // + // if (tid < 5) + // a1 = 1; + // else + // a2 = 2; + // a = phi(a1, a2); // sync dependent on (tid < 5) + BasicBlock *ThisBB = TI->getParent(); + BasicBlock *IPostDom = PDT.getNode(ThisBB)->getIDom()->getBlock(); + if (IPostDom == nullptr) + return; + + for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) { + // A PHINode is uniform if it returns the same value no matter which path is + // taken. + if (!cast<PHINode>(I)->hasConstantValue() && DV.insert(I).second) + Worklist.push_back(I); + } + + // Propagation rule 2: if a value defined in a loop is used outside, the user + // is sync dependent on the condition of the loop exits that dominate the + // user. For example, + // + // int i = 0; + // do { + // i++; + // if (foo(i)) ... // uniform + // } while (i < tid); + // if (bar(i)) ... // divergent + // + // A program may contain unstructured loops. Therefore, we cannot leverage + // LoopInfo, which only recognizes natural loops. + // + // The algorithm used here handles both natural and unstructured loops. Given + // a branch TI, we first compute its influence region, the union of all simple + // paths from TI to its immediate post dominator (IPostDom). Then, we search + // for all the values defined in the influence region but used outside. All + // these users are sync dependent on TI. + DenseSet<BasicBlock *> InfluenceRegion; + computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion); + // An insight that can speed up the search process is that all the in-region + // values that are used outside must dominate TI. Therefore, instead of + // searching every basic blocks in the influence region, we search all the + // dominators of TI until it is outside the influence region. + BasicBlock *InfluencedBB = ThisBB; + while (InfluenceRegion.count(InfluencedBB)) { + for (auto &I : *InfluencedBB) + findUsersOutsideInfluenceRegion(I, InfluenceRegion); + DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom(); + if (IDomNode == nullptr) + break; + InfluencedBB = IDomNode->getBlock(); + } +} + +void DivergencePropagator::findUsersOutsideInfluenceRegion( + Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) { + for (User *U : I.users()) { + Instruction *UserInst = cast<Instruction>(U); + if (!InfluenceRegion.count(UserInst->getParent())) { + if (DV.insert(UserInst).second) + Worklist.push_back(UserInst); + } + } +} + +void DivergencePropagator::computeInfluenceRegion( + BasicBlock *Start, BasicBlock *End, + DenseSet<BasicBlock *> &InfluenceRegion) { + assert(PDT.properlyDominates(End, Start) && + "End does not properly dominate Start"); + std::vector<BasicBlock *> InfluenceStack; + InfluenceStack.push_back(Start); + InfluenceRegion.insert(Start); + while (!InfluenceStack.empty()) { + BasicBlock *BB = InfluenceStack.back(); + InfluenceStack.pop_back(); + for (BasicBlock *Succ : successors(BB)) { + if (End != Succ && InfluenceRegion.insert(Succ).second) + InfluenceStack.push_back(Succ); + } + } +} + +void DivergencePropagator::exploreDataDependency(Value *V) { + // Follow def-use chains of V. + for (User *U : V->users()) { + Instruction *UserInst = cast<Instruction>(U); + if (DV.insert(UserInst).second) + Worklist.push_back(UserInst); + } +} + +void DivergencePropagator::propagate() { + // Traverse the dependency graph using DFS. + while (!Worklist.empty()) { + Value *V = Worklist.back(); + Worklist.pop_back(); + if (TerminatorInst *TI = dyn_cast<TerminatorInst>(V)) { + // Terminators with less than two successors won't introduce sync + // dependency. Ignore them. + if (TI->getNumSuccessors() > 1) + exploreSyncDependency(TI); + } + exploreDataDependency(V); + } +} + +} /// end namespace anonymous + +FunctionPass *llvm::createDivergenceAnalysisPass() { + return new DivergenceAnalysis(); +} + +bool DivergenceAnalysis::runOnFunction(Function &F) { + auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); + if (TTIWP == nullptr) + return false; + + TargetTransformInfo &TTI = TTIWP->getTTI(F); + // Fast path: if the target does not have branch divergence, we do not mark + // any branch as divergent. + if (!TTI.hasBranchDivergence()) + return false; + + DivergentValues.clear(); + DivergencePropagator DP(F, TTI, + getAnalysis<DominatorTreeWrapperPass>().getDomTree(), + getAnalysis<PostDominatorTree>(), DivergentValues); + DP.populateWithSourcesOfDivergence(); + DP.propagate(); + return false; +} + +void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const { + if (DivergentValues.empty()) + return; + const Value *FirstDivergentValue = *DivergentValues.begin(); + const Function *F; + if (const Argument *Arg = dyn_cast<Argument>(FirstDivergentValue)) { + F = Arg->getParent(); + } else if (const Instruction *I = + dyn_cast<Instruction>(FirstDivergentValue)) { + F = I->getParent()->getParent(); + } else { + llvm_unreachable("Only arguments and instructions can be divergent"); + } + + // Dumps all divergent values in F, arguments and then instructions. + for (auto &Arg : F->args()) { + if (DivergentValues.count(&Arg)) + OS << "DIVERGENT: " << Arg << "\n"; + } + // Iterate instructions using inst_range to ensure a deterministic order. + for (auto &I : inst_range(F)) { + if (DivergentValues.count(&I)) + OS << "DIVERGENT:" << I << "\n"; + } +} diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp index 9d607cc..65ba1c7 100644 --- a/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -212,10 +212,13 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, // list of the same call. CallSites.count(I->first) || - // If the call edge is not from a call or invoke, then the function - // pass RAUW'd a call with another value. This can happen when - // constant folding happens of well known functions etc. - !CallSite(I->first)) { + // If the call edge is not from a call or invoke, or it is a + // instrinsic call, then the function pass RAUW'd a call with + // another value. This can happen when constant folding happens + // of well known functions etc. + !CallSite(I->first) || + (CallSite(I->first).getCalledFunction() && + CallSite(I->first).getCalledFunction()->isIntrinsic())) { assert(!CheckingMode && "CallGraphSCCPass did not update the CallGraph correctly!"); diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index 2208f32..018ae99 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -269,7 +269,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, } else if (Operator::getOpcode(I) == Instruction::BitCast) { if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest)) return true; - } else if (CallSite CS = I) { + } else if (auto CS = CallSite(I)) { // Make sure that this is just the function being called, not that it is // passing into the function. if (!CS.isCallee(&U)) { diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp index eeb3b87..cacf70d 100644 --- a/lib/Analysis/IPA/InlineCost.cpp +++ b/lib/Analysis/IPA/InlineCost.cpp @@ -64,6 +64,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { bool ContainsNoDuplicateCall; bool HasReturn; bool HasIndirectBr; + bool HasFrameEscape; /// Number of bytes allocated statically by the callee. uint64_t AllocatedSize; @@ -148,12 +149,12 @@ public: IsCallerRecursive(false), IsRecursiveCall(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), - AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), - FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), - NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), - NumConstantPtrCmps(0), NumConstantPtrDiffs(0), - NumInstructionsSimplified(0), SROACostSavings(0), - SROACostSavingsLost(0) {} + HasFrameEscape(false), AllocatedSize(0), NumInstructions(0), + NumVectorInstructions(0), FiftyPercentVectorBonus(0), + TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), + NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), + NumConstantPtrDiffs(0), NumInstructionsSimplified(0), + SROACostSavings(0), SROACostSavingsLost(0) {} bool analyzeCall(CallSite CS); @@ -743,6 +744,9 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { case Intrinsic::memmove: // SROA can usually chew through these intrinsics, but they aren't free. return false; + case Intrinsic::frameescape: + HasFrameEscape = true; + return false; } } @@ -941,7 +945,7 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB, // If the visit this instruction detected an uninlinable pattern, abort. if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || - HasIndirectBr) + HasIndirectBr || HasFrameEscape) return false; // If the caller is a recursive function then we don't want to inline @@ -1171,7 +1175,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // returns false, and we can bail on out. if (!analyzeBlock(BB, EphValues)) { if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || - HasIndirectBr) + HasIndirectBr || HasFrameEscape) return false; // If the caller is a recursive function then we don't want to inline @@ -1286,16 +1290,18 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) { /// \brief Test that two functions either have or have not the given attribute /// at the same time. -static bool attributeMatches(Function *F1, Function *F2, - Attribute::AttrKind Attr) { - return F1->hasFnAttribute(Attr) == F2->hasFnAttribute(Attr); +template<typename AttrKind> +static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) { + return F1->getFnAttribute(Attr) == F2->getFnAttribute(Attr); } /// \brief Test that there are no attribute conflicts between Caller and Callee /// that prevent inlining. static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee) { - return attributeMatches(Caller, Callee, Attribute::SanitizeAddress) && + return attributeMatches(Caller, Callee, "target-cpu") && + attributeMatches(Caller, Callee, "target-features") && + attributeMatches(Caller, Callee, Attribute::SanitizeAddress) && attributeMatches(Caller, Callee, Attribute::SanitizeMemory) && attributeMatches(Caller, Callee, Attribute::SanitizeThread); } @@ -1370,6 +1376,13 @@ bool InlineCostAnalysis::isInlineViable(Function &F) { if (!ReturnsTwice && CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice()) return false; + + // Disallow inlining functions that call @llvm.frameescape. Doing this + // correctly would require major changes to the inliner. + if (CS.getCalledFunction() && + CS.getCalledFunction()->getIntrinsicID() == + llvm::Intrinsic::frameescape) + return false; } } diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 99c477d..d45f7bd 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -2978,10 +2978,12 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // what constant folding can make out of it. Constant *Null = Constant::getNullValue(GLHS->getPointerOperandType()); SmallVector<Value *, 4> IndicesLHS(GLHS->idx_begin(), GLHS->idx_end()); - Constant *NewLHS = ConstantExpr::getGetElementPtr(Null, IndicesLHS); + Constant *NewLHS = ConstantExpr::getGetElementPtr( + GLHS->getSourceElementType(), Null, IndicesLHS); SmallVector<Value *, 4> IndicesRHS(GRHS->idx_begin(), GRHS->idx_end()); - Constant *NewRHS = ConstantExpr::getGetElementPtr(Null, IndicesRHS); + Constant *NewRHS = ConstantExpr::getGetElementPtr( + GLHS->getSourceElementType(), Null, IndicesRHS); return ConstantExpr::getICmp(Pred, NewLHS, NewRHS); } } @@ -3241,17 +3243,18 @@ Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can /// fold the result. If not, this returns null. -static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) { +static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, + const Query &Q, unsigned) { // The type of the GEP pointer operand. - PointerType *PtrTy = cast<PointerType>(Ops[0]->getType()->getScalarType()); - unsigned AS = PtrTy->getAddressSpace(); + unsigned AS = + cast<PointerType>(Ops[0]->getType()->getScalarType())->getAddressSpace(); // getelementptr P -> P. if (Ops.size() == 1) return Ops[0]; // Compute the (pointer) type returned by the GEP instruction. - Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, Ops.slice(1)); + Type *LastType = GetElementPtrInst::getIndexedType(SrcTy, Ops.slice(1)); Type *GEPTy = PointerType::get(LastType, AS); if (VectorType *VT = dyn_cast<VectorType>(Ops[0]->getType())) GEPTy = VectorType::get(GEPTy, VT->getNumElements()); @@ -3264,7 +3267,7 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) { if (match(Ops[1], m_Zero())) return Ops[0]; - Type *Ty = PtrTy->getElementType(); + Type *Ty = SrcTy; if (Ty->isSized()) { Value *P; uint64_t C; @@ -3318,14 +3321,17 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) { if (!isa<Constant>(Ops[i])) return nullptr; - return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), Ops.slice(1)); + return ConstantExpr::getGetElementPtr(SrcTy, cast<Constant>(Ops[0]), + Ops.slice(1)); } Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyGEPInst(Ops, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); + return ::SimplifyGEPInst( + cast<PointerType>(Ops[0]->getType()->getScalarType())->getElementType(), + Ops, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } /// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index 1818e93..724c21f 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -177,6 +177,17 @@ void LoopAccessInfo::RuntimePointerCheck::print( } } +bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking( + const SmallVectorImpl<int> *PtrPartition) const { + unsigned NumPointers = Pointers.size(); + + for (unsigned I = 0; I < NumPointers; ++I) + for (unsigned J = I + 1; J < NumPointers; ++J) + if (needsChecking(I, J, PtrPartition)) + return true; + return false; +} + namespace { /// \brief Analyses memory accesses in a loop. /// @@ -1033,16 +1044,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) { StoreInst *ST = cast<StoreInst>(*I); Value* Ptr = ST->getPointerOperand(); - - if (isUniform(Ptr)) { - emitAnalysis( - LoopAccessReport(ST) - << "write to a loop invariant address could not be vectorized"); - DEBUG(dbgs() << "LAA: We don't allow storing to uniform addresses\n"); - CanVecMem = false; - return; - } - + // Check for store to loop invariant address. + StoreToLoopInvariantAddress |= isUniform(Ptr); // If we did *not* see this pointer before, insert it to the read-write // list. At this phase it is only a 'write' list. if (Seen.insert(Ptr).second) { @@ -1211,9 +1214,8 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V, std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck( Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const { - Instruction *tnullptr = nullptr; if (!PtrRtCheck.Need) - return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr); + return std::make_pair(nullptr, nullptr); unsigned NumPointers = PtrRtCheck.Pointers.size(); SmallVector<TrackingVH<Value> , 2> Starts; @@ -1284,6 +1286,9 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck( } } + if (!MemoryRuntimeCheck) + return std::make_pair(nullptr, nullptr); + // We have to do this trickery because the IRBuilder might fold the check to a // constant expression in which case there is no Instruction anchored in a // the block. @@ -1301,19 +1306,24 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const ValueToValueMap &Strides) : DepChecker(SE, L), NumComparisons(0), TheLoop(L), SE(SE), DL(DL), TLI(TLI), AA(AA), DT(DT), NumLoads(0), NumStores(0), - MaxSafeDepDistBytes(-1U), CanVecMem(false) { + MaxSafeDepDistBytes(-1U), CanVecMem(false), + StoreToLoopInvariantAddress(false) { if (canAnalyzeLoop()) analyzeLoop(Strides); } void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { if (CanVecMem) { - if (PtrRtCheck.empty()) - OS.indent(Depth) << "Memory dependences are safe\n"; - else + if (PtrRtCheck.Need) OS.indent(Depth) << "Memory dependences are safe with run-time checks\n"; + else + OS.indent(Depth) << "Memory dependences are safe\n"; } + OS.indent(Depth) << "Store to invariant address was " + << (StoreToLoopInvariantAddress ? "" : "not ") + << "found in loop.\n"; + if (Report) OS.indent(Depth) << "Report: " << Report->str() << "\n"; diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp index e1b7b4b..da3b829 100644 --- a/lib/Analysis/MemDepPrinter.cpp +++ b/lib/Analysis/MemDepPrinter.cpp @@ -106,7 +106,7 @@ bool MemDepPrinter::runOnFunction(Function &F) { if (!Res.isNonLocal()) { Deps[Inst].insert(std::make_pair(getInstTypePair(Res), static_cast<BasicBlock *>(nullptr))); - } else if (CallSite CS = cast<Value>(Inst)) { + } else if (auto CS = CallSite(Inst)) { const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI = MDA.getNonLocalCallDependency(CS); diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 716e3e6..84769cb 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -223,7 +223,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, continue; } - if (CallSite InstCS = cast<Value>(Inst)) { + if (auto InstCS = CallSite(Inst)) { // Debug intrinsics don't cause dependences. if (isa<DbgInfoIntrinsic>(Inst)) continue; // If these two calls do not interfere, look past it. @@ -874,23 +874,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { void MemoryDependenceAnalysis:: getNonLocalPointerDependency(Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) { - - auto getLocation = [](AliasAnalysis *AA, Instruction *Inst) { - if (auto *I = dyn_cast<LoadInst>(Inst)) - return AA->getLocation(I); - else if (auto *I = dyn_cast<StoreInst>(Inst)) - return AA->getLocation(I); - else if (auto *I = dyn_cast<VAArgInst>(Inst)) - return AA->getLocation(I); - else if (auto *I = dyn_cast<AtomicCmpXchgInst>(Inst)) - return AA->getLocation(I); - else if (auto *I = dyn_cast<AtomicRMWInst>(Inst)) - return AA->getLocation(I); - else - llvm_unreachable("unsupported memory instruction"); - }; - - const AliasAnalysis::Location Loc = getLocation(AA, QueryInst); + const AliasAnalysis::Location Loc = AA->getLocation(QueryInst); bool isLoad = isa<LoadInst>(QueryInst); BasicBlock *FromBB = QueryInst->getParent(); assert(FromBB); diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp index cbc4700..f2a11cb 100644 --- a/lib/Analysis/ModuleDebugInfoPrinter.cpp +++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -72,55 +72,53 @@ void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const { // Printing the nodes directly isn't particularly helpful (since they // reference other nodes that won't be printed, particularly for the // filenames), so just print a few useful things. - for (DICompileUnit CU : Finder.compile_units()) { + for (MDCompileUnit *CU : Finder.compile_units()) { O << "Compile unit: "; - if (const char *Lang = LanguageString(CU.getLanguage())) + if (const char *Lang = dwarf::LanguageString(CU->getSourceLanguage())) O << Lang; else - O << "unknown-language(" << CU.getLanguage() << ")"; - printFile(O, CU.getFilename(), CU.getDirectory()); + O << "unknown-language(" << CU->getSourceLanguage() << ")"; + printFile(O, CU->getFilename(), CU->getDirectory()); O << '\n'; } - for (DISubprogram S : Finder.subprograms()) { - O << "Subprogram: " << S.getName(); - printFile(O, S.getFilename(), S.getDirectory(), S.getLineNumber()); - if (!S.getLinkageName().empty()) - O << " ('" << S.getLinkageName() << "')"; + for (MDSubprogram *S : Finder.subprograms()) { + O << "Subprogram: " << S->getName(); + printFile(O, S->getFilename(), S->getDirectory(), S->getLine()); + if (!S->getLinkageName().empty()) + O << " ('" << S->getLinkageName() << "')"; O << '\n'; } for (DIGlobalVariable GV : Finder.global_variables()) { - O << "Global variable: " << GV.getName(); - printFile(O, GV.getFilename(), GV.getDirectory(), GV.getLineNumber()); - if (!GV.getLinkageName().empty()) - O << " ('" << GV.getLinkageName() << "')"; + O << "Global variable: " << GV->getName(); + printFile(O, GV->getFilename(), GV->getDirectory(), GV->getLine()); + if (!GV->getLinkageName().empty()) + O << " ('" << GV->getLinkageName() << "')"; O << '\n'; } - for (DIType T : Finder.types()) { + for (const MDType *T : Finder.types()) { O << "Type:"; - if (!T.getName().empty()) - O << ' ' << T.getName(); - printFile(O, T.getFilename(), T.getDirectory(), T.getLineNumber()); - if (T.isBasicType()) { - DIBasicType BT(T.get()); + if (!T->getName().empty()) + O << ' ' << T->getName(); + printFile(O, T->getFilename(), T->getDirectory(), T->getLine()); + if (auto *BT = dyn_cast<MDBasicType>(T)) { O << " "; if (const char *Encoding = - dwarf::AttributeEncodingString(BT.getEncoding())) + dwarf::AttributeEncodingString(BT->getEncoding())) O << Encoding; else - O << "unknown-encoding(" << BT.getEncoding() << ')'; + O << "unknown-encoding(" << BT->getEncoding() << ')'; } else { O << ' '; - if (const char *Tag = dwarf::TagString(T.getTag())) + if (const char *Tag = dwarf::TagString(T->getTag())) O << Tag; else - O << "unknown-tag(" << T.getTag() << ")"; + O << "unknown-tag(" << T->getTag() << ")"; } - if (T.isCompositeType()) { - DICompositeType CT(T.get()); - if (auto *S = CT.getIdentifier()) + if (auto *CT = dyn_cast<MDCompositeType>(T)) { + if (auto *S = CT->getRawIdentifier()) O << " (identifier: '" << S->getString() << "')"; } O << '\n'; diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp index cd1e944..5e1cdd4 100644 --- a/lib/Analysis/RegionPass.cpp +++ b/lib/Analysis/RegionPass.cpp @@ -199,7 +199,7 @@ public: bool runOnRegion(Region *R, RGPassManager &RGM) override { Out << Banner; - for (const auto &BB : R->blocks()) { + for (const auto *BB : R->blocks()) { if (BB) BB->print(Out); else diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp index ad83113..d7f5109 100644 --- a/lib/Analysis/RegionPrinter.cpp +++ b/lib/Analysis/RegionPrinter.cpp @@ -123,7 +123,7 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> { const RegionInfo &RI = *static_cast<const RegionInfo*>(R.getRegionInfo()); - for (const auto &BB : R.blocks()) + for (auto *BB : R.blocks()) if (RI.getRegionFor(BB) == &R) O.indent(2 * (depth + 1)) << "Node" << static_cast<const void*>(RI.getTopLevelRegion()->getBBNode(BB)) diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 4e713fb..37377f0 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -5690,7 +5690,7 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { if (PTy->getElementType()->isStructTy()) C2 = ConstantExpr::getIntegerCast( C2, Type::getInt32Ty(C->getContext()), true); - C = ConstantExpr::getGetElementPtr(C, C2); + C = ConstantExpr::getGetElementPtr(PTy->getElementType(), C, C2); } else C = ConstantExpr::getAdd(C, C2); } @@ -6698,6 +6698,65 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, return true; } + struct ClearWalkingBEDominatingCondsOnExit { + ScalarEvolution &SE; + + explicit ClearWalkingBEDominatingCondsOnExit(ScalarEvolution &SE) + : SE(SE){}; + + ~ClearWalkingBEDominatingCondsOnExit() { + SE.WalkingBEDominatingConds = false; + } + }; + + // We don't want more than one activation of the following loop on the stack + // -- that can lead to O(n!) time complexity. + if (WalkingBEDominatingConds) + return false; + + WalkingBEDominatingConds = true; + ClearWalkingBEDominatingCondsOnExit ClearOnExit(*this); + + // If the loop is not reachable from the entry block, we risk running into an + // infinite loop as we walk up into the dom tree. These loops do not matter + // anyway, so we just return a conservative answer when we see them. + if (!DT->isReachableFromEntry(L->getHeader())) + return false; + + for (DomTreeNode *DTN = (*DT)[Latch], *HeaderDTN = (*DT)[L->getHeader()]; + DTN != HeaderDTN; + DTN = DTN->getIDom()) { + + assert(DTN && "should reach the loop header before reaching the root!"); + + BasicBlock *BB = DTN->getBlock(); + BasicBlock *PBB = BB->getSinglePredecessor(); + if (!PBB) + continue; + + BranchInst *ContinuePredicate = dyn_cast<BranchInst>(PBB->getTerminator()); + if (!ContinuePredicate || !ContinuePredicate->isConditional()) + continue; + + Value *Condition = ContinuePredicate->getCondition(); + + // If we have an edge `E` within the loop body that dominates the only + // latch, the condition guarding `E` also guards the backedge. This + // reasoning works only for loops with a single latch. + + BasicBlockEdge DominatingEdge(PBB, BB); + if (DominatingEdge.isSingleEdge()) { + // We're constructively (and conservatively) enumerating edges within the + // loop body that dominate the latch. The dominator tree better agree + // with us on this: + assert(DT->dominates(DominatingEdge, Latch) && "should be!"); + + if (isImpliedCond(Pred, LHS, RHS, Condition, + BB != ContinuePredicate->getSuccessor(0))) + return true; + } + } + return false; } @@ -7968,8 +8027,8 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se) //===----------------------------------------------------------------------===// ScalarEvolution::ScalarEvolution() - : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64), - BlockDispositions(64), FirstUnknown(nullptr) { + : FunctionPass(ID), WalkingBEDominatingConds(false), ValuesAtScopes(64), + LoopDispositions(64), BlockDispositions(64), FirstUnknown(nullptr) { initializeScalarEvolutionPass(*PassRegistry::getPassRegistry()); } @@ -8000,6 +8059,7 @@ void ScalarEvolution::releaseMemory() { } assert(PendingLoopPredicates.empty() && "isImpliedCond garbage"); + assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!"); BackedgeTakenCounts.clear(); ConstantEvolutionLoopExitValue.clear(); diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index a73ec9e..0bd427b 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -488,7 +489,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // Fold a GEP with constant operands. if (Constant *CLHS = dyn_cast<Constant>(V)) if (Constant *CRHS = dyn_cast<Constant>(Idx)) - return ConstantExpr::getGetElementPtr(CLHS, CRHS); + return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ty->getContext()), + CLHS, CRHS); // Do a quick scan to see if we have this GEP nearby. If so, reuse it. unsigned ScanLimit = 6; @@ -523,7 +525,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, } // Emit a GEP. - Value *GEP = Builder.CreateGEP(V, Idx, "uglygep"); + Value *GEP = Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep"); rememberInstruction(GEP); return GEP; @@ -1803,6 +1805,72 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, return NumElim; } +bool SCEVExpander::isHighCostExpansionHelper( + const SCEV *S, Loop *L, SmallPtrSetImpl<const SCEV *> &Processed) { + if (!Processed.insert(S).second) + return false; + + if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) { + // If the divisor is a power of two and the SCEV type fits in a native + // integer, consider the divison cheap irrespective of whether it occurs in + // the user code since it can be lowered into a right shift. + if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS())) + if (SC->getValue()->getValue().isPowerOf2()) { + const DataLayout &DL = + L->getHeader()->getParent()->getParent()->getDataLayout(); + unsigned Width = cast<IntegerType>(UDivExpr->getType())->getBitWidth(); + return DL.isIllegalInteger(Width); + } + + // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or + // HowManyLessThans produced to compute a precise expression, rather than a + // UDiv from the user's code. If we can't find a UDiv in the code with some + // simple searching, assume the former consider UDivExpr expensive to + // compute. + BasicBlock *ExitingBB = L->getExitingBlock(); + if (!ExitingBB) + return true; + + BranchInst *ExitingBI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); + if (!ExitingBI || !ExitingBI->isConditional()) + return true; + + ICmpInst *OrigCond = dyn_cast<ICmpInst>(ExitingBI->getCondition()); + if (!OrigCond) + return true; + + const SCEV *RHS = SE.getSCEV(OrigCond->getOperand(1)); + RHS = SE.getMinusSCEV(RHS, SE.getConstant(RHS->getType(), 1)); + if (RHS != S) { + const SCEV *LHS = SE.getSCEV(OrigCond->getOperand(0)); + LHS = SE.getMinusSCEV(LHS, SE.getConstant(LHS->getType(), 1)); + if (LHS != S) + return true; + } + } + + // Recurse past add expressions, which commonly occur in the + // BackedgeTakenCount. They may already exist in program code, and if not, + // they are not too expensive rematerialize. + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + I != E; ++I) { + if (isHighCostExpansionHelper(*I, L, Processed)) + return true; + } + return false; + } + + // HowManyLessThans uses a Max expression whenever the loop is not guarded by + // the exit condition. + if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S)) + return true; + + // If we haven't recognized an expensive SCEV pattern, assume it's an + // expression produced by program code. + return false; +} + namespace { // Search for a SCEV subexpression that is not safe to expand. Any expression // that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp index 7e574d5..8b378a3 100644 --- a/lib/Analysis/TargetLibraryInfo.cpp +++ b/lib/Analysis/TargetLibraryInfo.cpp @@ -409,9 +409,7 @@ static StringRef sanitizeFunctionName(StringRef funcName) { // Check for \01 prefix that is used to mangle __asm declarations and // strip it if present. - if (funcName.front() == '\01') - funcName = funcName.substr(1); - return funcName; + return GlobalValue::getRealLinkageName(funcName); } bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName, diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index f51c7f54..a1519de 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -76,6 +76,10 @@ bool TargetTransformInfo::hasBranchDivergence() const { return TTIImpl->hasBranchDivergence(); } +bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const { + return TTIImpl->isSourceOfDivergence(V); +} + bool TargetTransformInfo::isLoweredToCall(const Function *F) const { return TTIImpl->isLoweredToCall(F); } diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index f329e3a..3651301 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -694,10 +694,9 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, // We're running this loop for once for each value queried resulting in a // runtime of ~O(#assumes * #values). - assert(isa<IntrinsicInst>(I) && - dyn_cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::assume && + assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume && "must be an assume intrinsic"); - + Value *Arg = I->getArgOperand(0); if (Arg == V && isValidAssumeForContext(I, Q)) { @@ -2935,7 +2934,7 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { if (const LoadInst *LI = dyn_cast<LoadInst>(V)) return LI->getMetadata(LLVMContext::MD_nonnull); - if (ImmutableCallSite CS = V) + if (auto CS = ImmutableCallSite(V)) if (CS.isReturnNonNull()) return true; diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 3bf090a..a72f713 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -598,6 +598,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(inalloca); KEYWORD(cold); KEYWORD(dereferenceable); + KEYWORD(dereferenceable_or_null); KEYWORD(inlinehint); KEYWORD(inreg); KEYWORD(jumptable); diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h index 3343168..90bf17d 100644 --- a/lib/AsmParser/LLLexer.h +++ b/lib/AsmParser/LLLexer.h @@ -45,7 +45,6 @@ namespace llvm { public: explicit LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &, LLVMContext &C); - ~LLLexer() {} lltok::Kind Lex() { return CurKind = LexToken(); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 103c8c4..546363b 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -976,6 +976,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, break; case lltok::kw_byval: case lltok::kw_dereferenceable: + case lltok::kw_dereferenceable_or_null: case lltok::kw_inalloca: case lltok::kw_nest: case lltok::kw_noalias: @@ -1220,11 +1221,18 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) { case lltok::kw_byval: B.addAttribute(Attribute::ByVal); break; case lltok::kw_dereferenceable: { uint64_t Bytes; - if (ParseOptionalDereferenceableBytes(Bytes)) + if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable, Bytes)) return true; B.addDereferenceableAttr(Bytes); continue; } + case lltok::kw_dereferenceable_or_null: { + uint64_t Bytes; + if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable_or_null, Bytes)) + return true; + B.addDereferenceableOrNullAttr(Bytes); + continue; + } case lltok::kw_inalloca: B.addAttribute(Attribute::InAlloca); break; case lltok::kw_inreg: B.addAttribute(Attribute::InReg); break; case lltok::kw_nest: B.addAttribute(Attribute::Nest); break; @@ -1284,11 +1292,18 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { return HaveError; case lltok::kw_dereferenceable: { uint64_t Bytes; - if (ParseOptionalDereferenceableBytes(Bytes)) + if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable, Bytes)) return true; B.addDereferenceableAttr(Bytes); continue; } + case lltok::kw_dereferenceable_or_null: { + uint64_t Bytes; + if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable_or_null, Bytes)) + return true; + B.addDereferenceableOrNullAttr(Bytes); + continue; + } case lltok::kw_inreg: B.addAttribute(Attribute::InReg); break; case lltok::kw_noalias: B.addAttribute(Attribute::NoAlias); break; case lltok::kw_nonnull: B.addAttribute(Attribute::NonNull); break; @@ -1516,12 +1531,19 @@ bool LLParser::ParseOptionalAlignment(unsigned &Alignment) { return false; } -/// ParseOptionalDereferenceableBytes +/// ParseOptionalDerefAttrBytes /// ::= /* empty */ -/// ::= 'dereferenceable' '(' 4 ')' -bool LLParser::ParseOptionalDereferenceableBytes(uint64_t &Bytes) { +/// ::= AttrKind '(' 4 ')' +/// +/// where AttrKind is either 'dereferenceable' or 'dereferenceable_or_null'. +bool LLParser::ParseOptionalDerefAttrBytes(lltok::Kind AttrKind, + uint64_t &Bytes) { + assert((AttrKind == lltok::kw_dereferenceable || + AttrKind == lltok::kw_dereferenceable_or_null) && + "contract!"); + Bytes = 0; - if (!EatIfPresent(lltok::kw_dereferenceable)) + if (!EatIfPresent(AttrKind)) return false; LocTy ParenLoc = Lex.getLoc(); if (!EatIfPresent(lltok::lparen)) @@ -2831,10 +2853,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { !BasePointerType->getElementType()->isSized(&Visited)) return Error(ID.Loc, "base element of getelementptr must be sized"); - if (!GetElementPtrInst::getIndexedType(Elts[0]->getType(), Indices)) + if (!GetElementPtrInst::getIndexedType(Ty, Indices)) return Error(ID.Loc, "invalid getelementptr indices"); - ID.ConstantVal = ConstantExpr::getGetElementPtr(Elts[0], Indices, - InBounds); + ID.ConstantVal = + ConstantExpr::getGetElementPtr(Ty, Elts[0], Indices, InBounds); } else if (Opc == Instruction::Select) { if (Elts.size() != 3) return Error(ID.Loc, "expected three operands to select"); @@ -3030,13 +3052,17 @@ struct MDBoolField : public MDFieldImpl<bool> { MDBoolField(bool Default = false) : ImplTy(Default) {} }; struct MDField : public MDFieldImpl<Metadata *> { - MDField() : ImplTy(nullptr) {} + bool AllowNull; + + MDField(bool AllowNull = true) : ImplTy(nullptr), AllowNull(AllowNull) {} }; struct MDConstant : public MDFieldImpl<ConstantAsMetadata *> { MDConstant() : ImplTy(nullptr) {} }; -struct MDStringField : public MDFieldImpl<std::string> { - MDStringField() : ImplTy(std::string()) {} +struct MDStringField : public MDFieldImpl<MDString *> { + bool AllowEmpty; + MDStringField(bool AllowEmpty = true) + : ImplTy(nullptr), AllowEmpty(AllowEmpty) {} }; struct MDFieldList : public MDFieldImpl<SmallVector<Metadata *, 4>> { MDFieldList() : ImplTy(SmallVector<Metadata *, 4>()) {} @@ -3161,7 +3187,7 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DIFlagField &Result) { if (Lex.getKind() != lltok::DIFlag) return TokError("expected debug info flag"); - Val = DIDescriptor::getFlag(Lex.getStrVal()); + Val = DebugNode::getFlag(Lex.getStrVal()); if (!Val) return TokError(Twine("invalid debug info flag flag '") + Lex.getStrVal() + "'"); @@ -3221,6 +3247,8 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDBoolField &Result) { template <> bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDField &Result) { if (Lex.getKind() == lltok::kw_null) { + if (!Result.AllowNull) + return TokError("'" + Name + "' cannot be null"); Lex.Lex(); Result.assign(nullptr); return false; @@ -3246,11 +3274,15 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDConstant &Result) { template <> bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDStringField &Result) { + LocTy ValueLoc = Lex.getLoc(); std::string S; if (ParseStringConstant(S)) return true; - Result.assign(std::move(S)); + if (!Result.AllowEmpty && S.empty()) + return Error(ValueLoc, "'" + Name + "' cannot be empty"); + + Result.assign(S.empty() ? nullptr : MDString::get(Context, S)); return false; } @@ -3343,7 +3375,7 @@ bool LLParser::ParseMDLocation(MDNode *&Result, bool IsDistinct) { #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ OPTIONAL(line, LineField, ); \ OPTIONAL(column, ColumnField, ); \ - REQUIRED(scope, MDField, ); \ + REQUIRED(scope, MDField, (/* AllowNull */ false)); \ OPTIONAL(inlinedAt, MDField, ); PARSE_MD_FIELDS(); #undef VISIT_MD_FIELDS @@ -3499,7 +3531,7 @@ bool LLParser::ParseMDFile(MDNode *&Result, bool IsDistinct) { bool LLParser::ParseMDCompileUnit(MDNode *&Result, bool IsDistinct) { #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ REQUIRED(language, DwarfLangField, ); \ - REQUIRED(file, MDField, ); \ + REQUIRED(file, MDField, (/* AllowNull */ false)); \ OPTIONAL(producer, MDStringField, ); \ OPTIONAL(isOptimized, MDBoolField, ); \ OPTIONAL(flags, MDStringField, ); \ @@ -3567,7 +3599,7 @@ bool LLParser::ParseMDSubprogram(MDNode *&Result, bool IsDistinct) { /// ::= !MDLexicalBlock(scope: !0, file: !2, line: 7, column: 9) bool LLParser::ParseMDLexicalBlock(MDNode *&Result, bool IsDistinct) { #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ - REQUIRED(scope, MDField, ); \ + REQUIRED(scope, MDField, (/* AllowNull */ false)); \ OPTIONAL(file, MDField, ); \ OPTIONAL(line, LineField, ); \ OPTIONAL(column, ColumnField, ); @@ -3583,7 +3615,7 @@ bool LLParser::ParseMDLexicalBlock(MDNode *&Result, bool IsDistinct) { /// ::= !MDLexicalBlockFile(scope: !0, file: !2, discriminator: 9) bool LLParser::ParseMDLexicalBlockFile(MDNode *&Result, bool IsDistinct) { #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ - REQUIRED(scope, MDField, ); \ + REQUIRED(scope, MDField, (/* AllowNull */ false)); \ OPTIONAL(file, MDField, ); \ REQUIRED(discriminator, MDUnsignedField, (0, UINT32_MAX)); PARSE_MD_FIELDS(); @@ -3648,8 +3680,8 @@ bool LLParser::ParseMDTemplateValueParameter(MDNode *&Result, bool IsDistinct) { /// declaration: !3) bool LLParser::ParseMDGlobalVariable(MDNode *&Result, bool IsDistinct) { #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ + REQUIRED(name, MDStringField, (/* AllowEmpty */ false)); \ OPTIONAL(scope, MDField, ); \ - OPTIONAL(name, MDStringField, ); \ OPTIONAL(linkageName, MDStringField, ); \ OPTIONAL(file, MDField, ); \ OPTIONAL(line, LineField, ); \ @@ -3670,25 +3702,23 @@ bool LLParser::ParseMDGlobalVariable(MDNode *&Result, bool IsDistinct) { /// ParseMDLocalVariable: /// ::= !MDLocalVariable(tag: DW_TAG_arg_variable, scope: !0, name: "foo", -/// file: !1, line: 7, type: !2, arg: 2, flags: 7, -/// inlinedAt: !3) +/// file: !1, line: 7, type: !2, arg: 2, flags: 7) bool LLParser::ParseMDLocalVariable(MDNode *&Result, bool IsDistinct) { #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ REQUIRED(tag, DwarfTagField, ); \ - OPTIONAL(scope, MDField, ); \ + REQUIRED(scope, MDField, (/* AllowNull */ false)); \ OPTIONAL(name, MDStringField, ); \ OPTIONAL(file, MDField, ); \ OPTIONAL(line, LineField, ); \ OPTIONAL(type, MDField, ); \ OPTIONAL(arg, MDUnsignedField, (0, UINT8_MAX)); \ - OPTIONAL(flags, DIFlagField, ); \ - OPTIONAL(inlinedAt, MDField, ); + OPTIONAL(flags, DIFlagField, ); PARSE_MD_FIELDS(); #undef VISIT_MD_FIELDS - Result = GET_OR_DISTINCT( - MDLocalVariable, (Context, tag.Val, scope.Val, name.Val, file.Val, - line.Val, type.Val, arg.Val, flags.Val, inlinedAt.Val)); + Result = GET_OR_DISTINCT(MDLocalVariable, + (Context, tag.Val, scope.Val, name.Val, file.Val, + line.Val, type.Val, arg.Val, flags.Val)); return false; } @@ -5130,10 +5160,8 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, // If RetType is a non-function pointer type, then this is the short syntax // for the call, which means that RetType is just the return type. Infer the // rest of the function argument types from the arguments that are present. - PointerType *PFTy = nullptr; - FunctionType *Ty = nullptr; - if (!(PFTy = dyn_cast<PointerType>(RetType)) || - !(Ty = dyn_cast<FunctionType>(PFTy->getElementType()))) { + FunctionType *Ty = dyn_cast<FunctionType>(RetType); + if (!Ty) { // Pull out the types of all of the arguments... std::vector<Type*> ParamTypes; for (unsigned i = 0, e = ArgList.size(); i != e; ++i) @@ -5143,12 +5171,12 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, return Error(RetTypeLoc, "Invalid result type for LLVM function"); Ty = FunctionType::get(RetType, ParamTypes, false); - PFTy = PointerType::getUnqual(Ty); } // Look up the callee. Value *Callee; - if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true; + if (ConvertValIDToValue(PointerType::getUnqual(Ty), CalleeID, Callee, &PFS)) + return true; // Set up the Attribute for the function. SmallVector<AttributeSet, 8> Attrs; @@ -5269,7 +5297,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) { Lex.Lex(); } - Type *Ty = nullptr; + Type *Ty; LocTy ExplicitTypeLoc = Lex.getLoc(); if (ParseType(Ty) || ParseToken(lltok::comma, "expected comma after load's type") || @@ -5278,8 +5306,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) { ParseOptionalCommaAlign(Alignment, AteExtraComma)) return true; - if (!Val->getType()->isPointerTy() || - !cast<PointerType>(Val->getType())->getElementType()->isFirstClassType()) + if (!Val->getType()->isPointerTy() || !Ty->isFirstClassType()) return Error(Loc, "load operand must be a pointer to a first class type"); if (isAtomic && !Alignment) return Error(Loc, "atomic load must have explicit non-zero alignment"); @@ -5290,7 +5317,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) { return Error(ExplicitTypeLoc, "explicit pointee type doesn't match operand's pointee type"); - Inst = new LoadInst(Val, "", isVolatile, Alignment, Ordering, Scope); + Inst = new LoadInst(Ty, Val, "", isVolatile, Alignment, Ordering, Scope); return AteExtraComma ? InstExtraComma : InstNormal; } @@ -5519,7 +5546,9 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { !BasePointerType->getElementType()->isSized(&Visited)) return Error(Loc, "base element of getelementptr must be sized"); - if (!GetElementPtrInst::getIndexedType(BaseType, Indices)) + if (!GetElementPtrInst::getIndexedType( + cast<PointerType>(BaseType->getScalarType())->getElementType(), + Indices)) return Error(Loc, "invalid getelementptr indices"); Inst = GetElementPtrInst::Create(Ty, Ptr, Indices); if (InBounds) diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index 5e92e57..117cdcb 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -223,7 +223,7 @@ namespace llvm { bool ParseOptionalDLLStorageClass(unsigned &DLLStorageClass); bool ParseOptionalCallingConv(unsigned &CC); bool ParseOptionalAlignment(unsigned &Alignment); - bool ParseOptionalDereferenceableBytes(uint64_t &Bytes); + bool ParseOptionalDerefAttrBytes(lltok::Kind AttrKind, uint64_t &Bytes); bool ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope, AtomicOrdering &Ordering); bool ParseOrdering(AtomicOrdering &Ordering); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index a7aa17c..2bdc53b 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -106,6 +106,7 @@ namespace lltok { kw_inalloca, kw_cold, kw_dereferenceable, + kw_dereferenceable_or_null, kw_inlinehint, kw_inreg, kw_jumptable, diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 84753ff..5366f5f 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -16,6 +16,7 @@ #include "llvm/Bitcode/LLVMBitCodes.h" #include "llvm/IR/AutoUpgrade.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticPrinter.h" @@ -218,6 +219,8 @@ class BitcodeReader : public GVMaterializer { /// True if any Metadata block has been materialized. bool IsMetadataMaterialized; + bool StripDebugInfo = false; + public: std::error_code Error(BitcodeError E, const Twine &Message); std::error_code Error(BitcodeError E); @@ -227,7 +230,7 @@ public: DiagnosticHandlerFunction DiagnosticHandler); explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C, DiagnosticHandlerFunction DiagnosticHandler); - ~BitcodeReader() { FreeState(); } + ~BitcodeReader() override { FreeState(); } std::error_code materializeForwardReferencedFunctions(); @@ -255,6 +258,8 @@ public: /// Materialize any deferred Metadata block. std::error_code materializeMetadata() override; + void setStripDebugInfo() override; + private: std::vector<StructType *> IdentifiedStructTypes; StructType *createIdentifiedStructType(LLVMContext &Context, StringRef Name); @@ -1093,6 +1098,8 @@ static Attribute::AttrKind GetAttrFromCode(uint64_t Code) { return Attribute::NonNull; case bitc::ATTR_KIND_DEREFERENCEABLE: return Attribute::Dereferenceable; + case bitc::ATTR_KIND_DEREFERENCEABLE_OR_NULL: + return Attribute::DereferenceableOrNull; case bitc::ATTR_KIND_NO_RED_ZONE: return Attribute::NoRedZone; case bitc::ATTR_KIND_NO_RETURN: @@ -1209,6 +1216,8 @@ std::error_code BitcodeReader::ParseAttributeGroupBlock() { B.addStackAlignmentAttr(Record[++i]); else if (Kind == Attribute::Dereferenceable) B.addDereferenceableAttr(Record[++i]); + else if (Kind == Attribute::DereferenceableOrNull) + B.addDereferenceableOrNullAttr(Record[++i]); } else { // String attribute assert((Record[i] == 3 || Record[i] == 4) && "Invalid attribute group entry"); @@ -1906,7 +1915,8 @@ std::error_code BitcodeReader::ParseMetadata() { break; } case bitc::METADATA_LOCAL_VAR: { - if (Record.size() != 10) + // 10th field is for the obseleted 'inlinedAt:' field. + if (Record.size() != 9 && Record.size() != 10) return Error("Invalid record"); MDValueList.AssignValue( @@ -1914,7 +1924,7 @@ std::error_code BitcodeReader::ParseMetadata() { (Context, Record[1], getMDOrNull(Record[2]), getMDString(Record[3]), getMDOrNull(Record[4]), Record[5], getMDOrNull(Record[6]), Record[7], - Record[8], getMDOrNull(Record[9]))), + Record[8])), NextMDValueNo++); break; } @@ -2308,14 +2318,17 @@ std::error_code BitcodeReader::ParseConstants() { Elts.push_back(ValueList.getConstantFwdRef(Record[OpNum++], ElTy)); } - ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end()); - V = ConstantExpr::getGetElementPtr(Elts[0], Indices, - BitCode == - bitc::CST_CODE_CE_INBOUNDS_GEP); if (PointeeType && - PointeeType != cast<GEPOperator>(V)->getSourceElementType()) + PointeeType != + cast<SequentialType>(Elts[0]->getType()->getScalarType()) + ->getElementType()) return Error("Explicit gep operator type does not match pointee type " "of pointer operand"); + + ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end()); + V = ConstantExpr::getGetElementPtr(PointeeType, Elts[0], Indices, + BitCode == + bitc::CST_CODE_CE_INBOUNDS_GEP); break; } case bitc::CST_CODE_CE_SELECT: { // CE_SELECT: [opval#, opval#, opval#] @@ -2609,6 +2622,8 @@ std::error_code BitcodeReader::materializeMetadata() { return std::error_code(); } +void BitcodeReader::setStripDebugInfo() { StripDebugInfo = true; } + /// RememberAndSkipFunctionBody - When we see the block for a function body, /// remember where it is and then skip it. This lets us lazily deserialize the /// functions. @@ -3053,8 +3068,12 @@ std::error_code BitcodeReader::ParseBitcodeInto(Module *M, // We expect a number of well-defined blocks, though we don't necessarily // need to understand them all. while (1) { - if (Stream.AtEndOfStream()) - return std::error_code(); + if (Stream.AtEndOfStream()) { + if (TheModule) + return std::error_code(); + // We didn't really read a proper Module. + return Error("Malformed IR file"); + } BitstreamEntry Entry = Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs); @@ -4305,6 +4324,9 @@ std::error_code BitcodeReader::materialize(GlobalValue *GV) { return EC; F->setIsMaterializable(false); + if (StripDebugInfo) + stripDebugInfo(*F); + // Upgrade any old intrinsic calls in the function. for (UpgradedIntrinsicMap::iterator I = UpgradedIntrinsics.begin(), E = UpgradedIntrinsics.end(); I != E; ++I) { diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 0123fb2..aa4a6a4 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -200,6 +200,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_NON_NULL; case Attribute::Dereferenceable: return bitc::ATTR_KIND_DEREFERENCEABLE; + case Attribute::DereferenceableOrNull: + return bitc::ATTR_KIND_DEREFERENCEABLE_OR_NULL; case Attribute::NoRedZone: return bitc::ATTR_KIND_NO_RED_ZONE; case Attribute::NoReturn: @@ -821,7 +823,7 @@ static void WriteMDSubrange(const MDSubrange *N, const ValueEnumerator &, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(N->getCount()); - Record.push_back(rotateSign(N->getLo())); + Record.push_back(rotateSign(N->getLowerBound())); Stream.EmitRecord(bitc::METADATA_SUBRANGE, Record, Abbrev); Record.clear(); @@ -892,10 +894,10 @@ static void WriteMDCompositeType(const MDCompositeType *N, Record.push_back(N->getAlignInBits()); Record.push_back(N->getOffsetInBits()); Record.push_back(N->getFlags()); - Record.push_back(VE.getMetadataOrNullID(N->getElements())); + Record.push_back(VE.getMetadataOrNullID(N->getElements().get())); Record.push_back(N->getRuntimeLang()); Record.push_back(VE.getMetadataOrNullID(N->getVTableHolder())); - Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams())); + Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams().get())); Record.push_back(VE.getMetadataOrNullID(N->getRawIdentifier())); Stream.EmitRecord(bitc::METADATA_COMPOSITE_TYPE, Record, Abbrev); @@ -909,7 +911,7 @@ static void WriteMDSubroutineType(const MDSubroutineType *N, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(N->getFlags()); - Record.push_back(VE.getMetadataOrNullID(N->getTypeArray())); + Record.push_back(VE.getMetadataOrNullID(N->getTypeArray().get())); Stream.EmitRecord(bitc::METADATA_SUBROUTINE_TYPE, Record, Abbrev); Record.clear(); @@ -940,11 +942,11 @@ static void WriteMDCompileUnit(const MDCompileUnit *N, Record.push_back(N->getRuntimeVersion()); Record.push_back(VE.getMetadataOrNullID(N->getRawSplitDebugFilename())); Record.push_back(N->getEmissionKind()); - Record.push_back(VE.getMetadataOrNullID(N->getEnumTypes())); - Record.push_back(VE.getMetadataOrNullID(N->getRetainedTypes())); - Record.push_back(VE.getMetadataOrNullID(N->getSubprograms())); - Record.push_back(VE.getMetadataOrNullID(N->getGlobalVariables())); - Record.push_back(VE.getMetadataOrNullID(N->getImportedEntities())); + Record.push_back(VE.getMetadataOrNullID(N->getEnumTypes().get())); + Record.push_back(VE.getMetadataOrNullID(N->getRetainedTypes().get())); + Record.push_back(VE.getMetadataOrNullID(N->getSubprograms().get())); + Record.push_back(VE.getMetadataOrNullID(N->getGlobalVariables().get())); + Record.push_back(VE.getMetadataOrNullID(N->getImportedEntities().get())); Stream.EmitRecord(bitc::METADATA_COMPILE_UNIT, Record, Abbrev); Record.clear(); @@ -970,10 +972,10 @@ static void WriteMDSubprogram(const MDSubprogram *N, Record.push_back(N->getVirtualIndex()); Record.push_back(N->getFlags()); Record.push_back(N->isOptimized()); - Record.push_back(VE.getMetadataOrNullID(N->getFunction())); - Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams())); + Record.push_back(VE.getMetadataOrNullID(N->getRawFunction())); + Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams().get())); Record.push_back(VE.getMetadataOrNullID(N->getDeclaration())); - Record.push_back(VE.getMetadataOrNullID(N->getVariables())); + Record.push_back(VE.getMetadataOrNullID(N->getVariables().get())); Stream.EmitRecord(bitc::METADATA_SUBPROGRAM, Record, Abbrev); Record.clear(); @@ -1064,7 +1066,7 @@ static void WriteMDGlobalVariable(const MDGlobalVariable *N, Record.push_back(VE.getMetadataOrNullID(N->getType())); Record.push_back(N->isLocalToUnit()); Record.push_back(N->isDefinition()); - Record.push_back(VE.getMetadataOrNullID(N->getVariable())); + Record.push_back(VE.getMetadataOrNullID(N->getRawVariable())); Record.push_back(VE.getMetadataOrNullID(N->getStaticDataMemberDeclaration())); Stream.EmitRecord(bitc::METADATA_GLOBAL_VAR, Record, Abbrev); @@ -1085,7 +1087,6 @@ static void WriteMDLocalVariable(const MDLocalVariable *N, Record.push_back(VE.getMetadataOrNullID(N->getType())); Record.push_back(N->getArg()); Record.push_back(N->getFlags()); - Record.push_back(VE.getMetadataOrNullID(N->getInlinedAt())); Stream.EmitRecord(bitc::METADATA_LOCAL_VAR, Record, Abbrev); Record.clear(); @@ -2047,6 +2048,9 @@ static void WriteUseList(ValueEnumerator &VE, UseListOrder &&Order, static void WriteUseListBlock(const Function *F, ValueEnumerator &VE, BitstreamWriter &Stream) { + assert(VE.shouldPreserveUseListOrder() && + "Expected to be preserving use-list order"); + auto hasMore = [&]() { return !VE.UseListOrders.empty() && VE.UseListOrders.back().F == F; }; @@ -2089,7 +2093,7 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE, bool NeedsMetadataAttachment = false; - DebugLoc LastDL; + MDLocation *LastDL = nullptr; // Finally, emit all the instructions, in order. for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) @@ -2104,26 +2108,22 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE, NeedsMetadataAttachment |= I->hasMetadataOtherThanDebugLoc(); // If the instruction has a debug location, emit it. - DebugLoc DL = I->getDebugLoc(); - if (DL.isUnknown()) { - // nothing todo. - } else if (DL == LastDL) { + MDLocation *DL = I->getDebugLoc(); + if (!DL) + continue; + + if (DL == LastDL) { // Just repeat the same debug loc as last time. Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC_AGAIN, Vals); - } else { - MDNode *Scope, *IA; - DL.getScopeAndInlinedAt(Scope, IA, I->getContext()); - assert(Scope && "Expected valid scope"); - - Vals.push_back(DL.getLine()); - Vals.push_back(DL.getCol()); - Vals.push_back(VE.getMetadataOrNullID(Scope)); - Vals.push_back(VE.getMetadataOrNullID(IA)); - Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals); - Vals.clear(); - - LastDL = DL; + continue; } + + Vals.push_back(DL->getLine()); + Vals.push_back(DL->getColumn()); + Vals.push_back(VE.getMetadataOrNullID(DL->getScope())); + Vals.push_back(VE.getMetadataOrNullID(DL->getInlinedAt())); + Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals); + Vals.clear(); } // Emit names for all the instructions etc. @@ -2131,7 +2131,7 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE, if (NeedsMetadataAttachment) WriteMetadataAttachment(F, VE, Stream); - if (shouldPreserveBitcodeUseListOrder()) + if (VE.shouldPreserveUseListOrder()) WriteUseListBlock(&F, VE, Stream); VE.purgeFunction(); Stream.ExitBlock(); @@ -2313,7 +2313,8 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) { } /// WriteModule - Emit the specified module to the bitstream. -static void WriteModule(const Module *M, BitstreamWriter &Stream) { +static void WriteModule(const Module *M, BitstreamWriter &Stream, + bool ShouldPreserveUseListOrder) { Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); SmallVector<unsigned, 1> Vals; @@ -2322,7 +2323,7 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) { Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals); // Analyze the module, enumerating globals, functions, etc. - ValueEnumerator VE(*M); + ValueEnumerator VE(*M, ShouldPreserveUseListOrder); // Emit blockinfo, which defines the standard abbreviations etc. WriteBlockInfo(VE, Stream); @@ -2355,7 +2356,7 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) { WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream); // Emit module-level use-lists. - if (shouldPreserveBitcodeUseListOrder()) + if (VE.shouldPreserveUseListOrder()) WriteUseListBlock(nullptr, VE, Stream); // Emit function bodies. @@ -2441,7 +2442,8 @@ static void EmitDarwinBCHeaderAndTrailer(SmallVectorImpl<char> &Buffer, /// WriteBitcodeToFile - Write the specified module to the specified output /// stream. -void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) { +void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, + bool ShouldPreserveUseListOrder) { SmallVector<char, 0> Buffer; Buffer.reserve(256*1024); @@ -2464,7 +2466,7 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) { Stream.Emit(0xD, 4); // Emit the module. - WriteModule(M, Stream); + WriteModule(M, Stream, ShouldPreserveUseListOrder); } if (TT.isOSDarwin()) diff --git a/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/lib/Bitcode/Writer/BitcodeWriterPass.cpp index 25456a4..3165743 100644 --- a/lib/Bitcode/Writer/BitcodeWriterPass.cpp +++ b/lib/Bitcode/Writer/BitcodeWriterPass.cpp @@ -19,22 +19,25 @@ using namespace llvm; PreservedAnalyses BitcodeWriterPass::run(Module &M) { - WriteBitcodeToFile(&M, OS); + WriteBitcodeToFile(&M, OS, ShouldPreserveUseListOrder); return PreservedAnalyses::all(); } namespace { class WriteBitcodePass : public ModulePass { raw_ostream &OS; // raw_ostream to print on + bool ShouldPreserveUseListOrder; + public: static char ID; // Pass identification, replacement for typeid - explicit WriteBitcodePass(raw_ostream &o) - : ModulePass(ID), OS(o) {} + explicit WriteBitcodePass(raw_ostream &o, bool ShouldPreserveUseListOrder) + : ModulePass(ID), OS(o), + ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {} const char *getPassName() const override { return "Bitcode Writer"; } bool runOnModule(Module &M) override { - WriteBitcodeToFile(&M, OS); + WriteBitcodeToFile(&M, OS, ShouldPreserveUseListOrder); return false; } }; @@ -42,6 +45,7 @@ namespace { char WriteBitcodePass::ID = 0; -ModulePass *llvm::createBitcodeWriterPass(raw_ostream &Str) { - return new WriteBitcodePass(Str); +ModulePass *llvm::createBitcodeWriterPass(raw_ostream &Str, + bool ShouldPreserveUseListOrder) { + return new WriteBitcodePass(Str, ShouldPreserveUseListOrder); } diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index 549e94f..7f576d7 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -283,9 +283,11 @@ static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) { return V.first->getType()->isIntOrIntVectorTy(); } -ValueEnumerator::ValueEnumerator(const Module &M) - : HasMDString(false), HasMDLocation(false), HasGenericDebugNode(false) { - if (shouldPreserveBitcodeUseListOrder()) +ValueEnumerator::ValueEnumerator(const Module &M, + bool ShouldPreserveUseListOrder) + : HasMDString(false), HasMDLocation(false), HasGenericDebugNode(false), + ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) { + if (ShouldPreserveUseListOrder) UseListOrders = predictUseListOrder(M); // Enumerate the global variables. @@ -373,12 +375,10 @@ ValueEnumerator::ValueEnumerator(const Module &M) for (unsigned i = 0, e = MDs.size(); i != e; ++i) EnumerateMetadata(MDs[i].second); - if (!I.getDebugLoc().isUnknown()) { - MDNode *Scope, *IA; - I.getDebugLoc().getScopeAndInlinedAt(Scope, IA, I.getContext()); - if (Scope) EnumerateMetadata(Scope); - if (IA) EnumerateMetadata(IA); - } + // Don't enumerate the location directly -- it has a special record + // type -- but enumerate its operands. + if (MDLocation *L = I.getDebugLoc()) + EnumerateMDNodeOperands(L); } } @@ -463,7 +463,7 @@ void ValueEnumerator::print(raw_ostream &OS, const MetadataMapType &Map, void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) { if (CstStart == CstEnd || CstStart+1 == CstEnd) return; - if (shouldPreserveBitcodeUseListOrder()) + if (ShouldPreserveUseListOrder) // Optimizing constants makes the use-list order difficult to predict. // Disable it for now when trying to preserve the order. return; diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h index b94c370..ba245a3 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.h +++ b/lib/Bitcode/Writer/ValueEnumerator.h @@ -67,6 +67,7 @@ private: bool HasMDString; bool HasMDLocation; bool HasGenericDebugNode; + bool ShouldPreserveUseListOrder; typedef DenseMap<AttributeSet, unsigned> AttributeGroupMapType; AttributeGroupMapType AttributeGroupMap; @@ -102,7 +103,7 @@ private: ValueEnumerator(const ValueEnumerator &) = delete; void operator=(const ValueEnumerator &) = delete; public: - ValueEnumerator(const Module &M); + ValueEnumerator(const Module &M, bool ShouldPreserveUseListOrder); void dump() const; void print(raw_ostream &OS, const ValueMapType &Map, const char *Name) const; @@ -123,6 +124,8 @@ public: bool hasMDLocation() const { return HasMDLocation; } bool hasGenericDebugNode() const { return HasGenericDebugNode; } + bool shouldPreserveUseListOrder() const { return ShouldPreserveUseListOrder; } + unsigned getTypeID(Type *T) const { TypeMapType::const_iterator I = TypeMap.find(T); assert(I != TypeMap.end() && "Type not in ValueEnumerator!"); diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h index 12cf95b..f9544dd 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -127,7 +127,7 @@ class RegisterClassInfo; AggressiveAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI, TargetSubtargetInfo::RegClassVector& CriticalPathRCs); - ~AggressiveAntiDepBreaker(); + ~AggressiveAntiDepBreaker() override; /// Initialize anti-dep breaking for a new basic block. void StartBlock(MachineBasicBlock *BB) override; diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 07d6731..43d7a38 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -28,7 +28,7 @@ #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Mangler.h" @@ -671,17 +671,17 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { OS << "DEBUG_VALUE: "; DIVariable V = MI->getDebugVariable(); - if (V.getContext().isSubprogram()) { - StringRef Name = DISubprogram(V.getContext()).getDisplayName(); + if (auto *SP = dyn_cast<MDSubprogram>(V->getScope())) { + StringRef Name = SP->getDisplayName(); if (!Name.empty()) OS << Name << ":"; } - OS << V.getName(); + OS << V->getName(); DIExpression Expr = MI->getDebugExpression(); - if (Expr.isBitPiece()) - OS << " [bit_piece offset=" << Expr.getBitPieceOffset() - << " size=" << Expr.getBitPieceSize() << "]"; + if (Expr->isBitPiece()) + OS << " [bit_piece offset=" << Expr->getBitPieceOffset() + << " size=" << Expr->getBitPieceSize() << "]"; OS << " <- "; // The second operand is only an offset if it's an immediate. @@ -1034,11 +1034,31 @@ bool AsmPrinter::doFinalization(Module &M) { EmitVisibility(Name, V, false); } + const TargetLoweringObjectFile &TLOF = getObjFileLowering(); + // Emit module flags. SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags; M.getModuleFlagsMetadata(ModuleFlags); if (!ModuleFlags.empty()) - getObjFileLowering().emitModuleFlags(OutStreamer, ModuleFlags, *Mang, TM); + TLOF.emitModuleFlags(OutStreamer, ModuleFlags, *Mang, TM); + + Triple TT(TM.getTargetTriple()); + if (TT.isOSBinFormatELF()) { + MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); + + // Output stubs for external and common global variables. + MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); + if (!Stubs.empty()) { + OutStreamer.SwitchSection(TLOF.getDataRelSection()); + const DataLayout *DL = TM.getDataLayout(); + + for (const auto &Stub : Stubs) { + OutStreamer.EmitLabel(Stub.first); + OutStreamer.EmitSymbolValue(Stub.second.getPointer(), + DL->getPointerSize()); + } + } + } // Make sure we wrote out everything we need. OutStreamer.Flush(); @@ -2302,7 +2322,7 @@ MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV, MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const { SmallString<60> NameStr; Mang->getNameWithPrefix(NameStr, Sym); - return OutContext.GetOrCreateSymbol(NameStr.str()); + return OutContext.GetOrCreateSymbol(NameStr); } diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp index bbdf237..1e3c5d7 100644 --- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp +++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp @@ -33,7 +33,7 @@ static unsigned isDescribedByReg(const MachineInstr &MI) { return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0; } -void DbgValueHistoryMap::startInstrRange(const MDNode *Var, +void DbgValueHistoryMap::startInstrRange(InlinedVariable Var, const MachineInstr &MI) { // Instruction range should start with a DBG_VALUE instruction for the // variable. @@ -48,7 +48,7 @@ void DbgValueHistoryMap::startInstrRange(const MDNode *Var, Ranges.push_back(std::make_pair(&MI, nullptr)); } -void DbgValueHistoryMap::endInstrRange(const MDNode *Var, +void DbgValueHistoryMap::endInstrRange(InlinedVariable Var, const MachineInstr &MI) { auto &Ranges = VarInstrRanges[Var]; // Verify that the current instruction range is not yet closed. @@ -59,7 +59,7 @@ void DbgValueHistoryMap::endInstrRange(const MDNode *Var, Ranges.back().second = &MI; } -unsigned DbgValueHistoryMap::getRegisterForVar(const MDNode *Var) const { +unsigned DbgValueHistoryMap::getRegisterForVar(InlinedVariable Var) const { const auto &I = VarInstrRanges.find(Var); if (I == VarInstrRanges.end()) return 0; @@ -71,12 +71,13 @@ unsigned DbgValueHistoryMap::getRegisterForVar(const MDNode *Var) const { namespace { // Maps physreg numbers to the variables they describe. -typedef std::map<unsigned, SmallVector<const MDNode *, 1>> RegDescribedVarsMap; +typedef DbgValueHistoryMap::InlinedVariable InlinedVariable; +typedef std::map<unsigned, SmallVector<InlinedVariable, 1>> RegDescribedVarsMap; } // \brief Claim that @Var is not described by @RegNo anymore. -static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, - unsigned RegNo, const MDNode *Var) { +static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo, + InlinedVariable Var) { const auto &I = RegVars.find(RegNo); assert(RegNo != 0U && I != RegVars.end()); auto &VarSet = I->second; @@ -89,8 +90,8 @@ static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, } // \brief Claim that @Var is now described by @RegNo. -static void addRegDescribedVar(RegDescribedVarsMap &RegVars, - unsigned RegNo, const MDNode *Var) { +static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo, + InlinedVariable Var) { assert(RegNo != 0U); auto &VarSet = RegVars[RegNo]; assert(std::find(VarSet.begin(), VarSet.end(), Var) == VarSet.end()); @@ -203,7 +204,10 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF, // Use the base variable (without any DW_OP_piece expressions) // as index into History. The full variables including the // piece expressions are attached to the MI. - DIVariable Var = MI.getDebugVariable(); + MDLocalVariable *RawVar = MI.getDebugVariable(); + assert(RawVar->isValidLocationForIntrinsic(MI.getDebugLoc()) && + "Expected inlined-at fields to agree"); + InlinedVariable Var(RawVar, MI.getDebugLoc()->getInlinedAt()); if (unsigned PrevReg = Result.getRegisterForVar(Var)) dropRegDescribedVar(RegVars, PrevReg, Var); diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h index 4b62007..c25aaff 100644 --- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h +++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h @@ -17,7 +17,8 @@ namespace llvm { class MachineFunction; class MachineInstr; -class MDNode; +class MDLocalVariable; +class MDLocation; class TargetRegisterInfo; // For each user variable, keep a list of instruction ranges where this variable @@ -31,16 +32,19 @@ class DbgValueHistoryMap { public: typedef std::pair<const MachineInstr *, const MachineInstr *> InstrRange; typedef SmallVector<InstrRange, 4> InstrRanges; - typedef MapVector<const MDNode *, InstrRanges> InstrRangesMap; + typedef std::pair<const MDLocalVariable *, const MDLocation *> + InlinedVariable; + typedef MapVector<InlinedVariable, InstrRanges> InstrRangesMap; + private: InstrRangesMap VarInstrRanges; public: - void startInstrRange(const MDNode *Var, const MachineInstr &MI); - void endInstrRange(const MDNode *Var, const MachineInstr &MI); + void startInstrRange(InlinedVariable Var, const MachineInstr &MI); + void endInstrRange(InlinedVariable Var, const MachineInstr &MI); // Returns register currently describing @Var. If @Var is currently // unaccessible or is not described by a register, returns 0. - unsigned getRegisterForVar(const MDNode *Var) const; + unsigned getRegisterForVar(InlinedVariable Var) const; bool empty() const { return VarInstrRanges.empty(); } void clear() { VarInstrRanges.clear(); } diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h index 6914bbe..4f6714e 100644 --- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h +++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -42,8 +42,8 @@ public: } Value(const MDNode *Var, const MDNode *Expr, MachineLocation Loc) : Variable(Var), Expression(Expr), EntryKind(E_Location), Loc(Loc) { - assert(DIVariable(Var).Verify()); - assert(DIExpression(Expr)->isValid()); + assert(isa<MDLocalVariable>(Var)); + assert(cast<MDExpression>(Expr)->isValid()); } /// The variable to which this location entry corresponds. @@ -74,10 +74,11 @@ public: const ConstantFP *getConstantFP() const { return Constant.CFP; } const ConstantInt *getConstantInt() const { return Constant.CIP; } MachineLocation getLoc() const { return Loc; } - const MDNode *getVariableNode() const { return Variable; } - DIVariable getVariable() const { return DIVariable(Variable); } - bool isBitPiece() const { return getExpression().isBitPiece(); } - DIExpression getExpression() const { return DIExpression(Expression); } + DIVariable getVariable() const { return cast<MDLocalVariable>(Variable); } + bool isBitPiece() const { return getExpression()->isBitPiece(); } + DIExpression getExpression() const { + return cast_or_null<MDExpression>(Expression); + } friend bool operator==(const Value &, const Value &); friend bool operator<(const Value &, const Value &); }; @@ -101,12 +102,12 @@ public: /// Return true if the merge was successful. bool MergeValues(const DebugLocEntry &Next) { if (Begin == Next.Begin) { - DIExpression Expr(Values[0].Expression); - DIVariable Var(Values[0].Variable); - DIExpression NextExpr(Next.Values[0].Expression); - DIVariable NextVar(Next.Values[0].Variable); - if (Var == NextVar && Expr.isBitPiece() && - NextExpr.isBitPiece()) { + DIExpression Expr = cast_or_null<MDExpression>(Values[0].Expression); + DIVariable Var = cast_or_null<MDLocalVariable>(Values[0].Variable); + DIExpression NextExpr = + cast_or_null<MDExpression>(Next.Values[0].Expression); + DIVariable NextVar = cast_or_null<MDLocalVariable>(Next.Values[0].Variable); + if (Var == NextVar && Expr->isBitPiece() && NextExpr->isBitPiece()) { addValues(Next.Values); End = Next.End; return true; @@ -189,8 +190,8 @@ inline bool operator==(const DebugLocEntry::Value &A, /// \brief Compare two pieces based on their offset. inline bool operator<(const DebugLocEntry::Value &A, const DebugLocEntry::Value &B) { - return A.getExpression().getBitPieceOffset() < - B.getExpression().getBitPieceOffset(); + return A.getExpression()->getBitPieceOffset() < + B.getExpression()->getBitPieceOffset(); } } diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index eee5fc5..75d3b68 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -101,51 +101,52 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) { if (DIE *Die = getDIE(GV)) return Die; - assert(GV.isGlobalVariable()); + assert(GV); - DIScope GVContext = GV.getContext(); - DIType GTy = DD->resolve(GV.getType()); + DIScope GVContext = GV->getScope(); + DIType GTy = DD->resolve(GV->getType()); // Construct the context before querying for the existence of the DIE in // case such construction creates the DIE. DIE *ContextDIE = getOrCreateContextDIE(GVContext); // Add to map. - DIE *VariableDIE = &createAndAddDIE(GV.getTag(), *ContextDIE, GV); + DIE *VariableDIE = &createAndAddDIE(GV->getTag(), *ContextDIE, GV); DIScope DeclContext; - if (DIDerivedType SDMDecl = GV.getStaticDataMemberDeclaration()) { - DeclContext = resolve(SDMDecl.getContext()); - assert(SDMDecl.isStaticMember() && "Expected static member decl"); - assert(GV.isDefinition()); + if (auto *SDMDecl = GV->getStaticDataMemberDeclaration()) { + DeclContext = resolve(SDMDecl->getScope()); + assert(SDMDecl->isStaticMember() && "Expected static member decl"); + assert(GV->isDefinition()); // We need the declaration DIE that is in the static member's class. DIE *VariableSpecDIE = getOrCreateStaticMemberDIE(SDMDecl); addDIEEntry(*VariableDIE, dwarf::DW_AT_specification, *VariableSpecDIE); } else { - DeclContext = GV.getContext(); + DeclContext = GV->getScope(); // Add name and type. - addString(*VariableDIE, dwarf::DW_AT_name, GV.getDisplayName()); + addString(*VariableDIE, dwarf::DW_AT_name, GV->getDisplayName()); addType(*VariableDIE, GTy); // Add scoping info. - if (!GV.isLocalToUnit()) + if (!GV->isLocalToUnit()) addFlag(*VariableDIE, dwarf::DW_AT_external); // Add line number info. addSourceLine(*VariableDIE, GV); } - if (!GV.isDefinition()) + if (!GV->isDefinition()) addFlag(*VariableDIE, dwarf::DW_AT_declaration); + else + addGlobalName(GV->getName(), *VariableDIE, DeclContext); // Add location. bool addToAccelTable = false; - bool isGlobalVariable = GV.getGlobal() != nullptr; - if (isGlobalVariable) { + if (auto *Global = dyn_cast_or_null<GlobalVariable>(GV->getVariable())) { addToAccelTable = true; DIELoc *Loc = new (DIEValueAllocator) DIELoc(); - const MCSymbol *Sym = Asm->getSymbol(GV.getGlobal()); - if (GV.getGlobal()->isThreadLocal()) { + const MCSymbol *Sym = Asm->getSymbol(Global); + if (Global->isThreadLocal()) { // FIXME: Make this work with -gsplit-dwarf. unsigned PointerSize = Asm->getDataLayout().getPointerSize(); assert((PointerSize == 4 || PointerSize == 8) && @@ -174,11 +175,11 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) { } addBlock(*VariableDIE, dwarf::DW_AT_location, Loc); - addLinkageName(*VariableDIE, GV.getLinkageName()); + addLinkageName(*VariableDIE, GV->getLinkageName()); } else if (const ConstantInt *CI = - dyn_cast_or_null<ConstantInt>(GV.getConstant())) { + dyn_cast_or_null<ConstantInt>(GV->getVariable())) { addConstantValue(*VariableDIE, CI, GTy); - } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV.getConstant())) { + } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getVariable())) { addToAccelTable = true; // GV is a merged global. DIELoc *Loc = new (DIEValueAllocator) DIELoc(); @@ -195,15 +196,14 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) { } if (addToAccelTable) { - DD->addAccelName(GV.getName(), *VariableDIE); + DD->addAccelName(GV->getName(), *VariableDIE); // If the linkage name is different than the name, go ahead and output // that as well into the name table. - if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName()) - DD->addAccelName(GV.getLinkageName(), *VariableDIE); + if (GV->getLinkageName() != "" && GV->getName() != GV->getLinkageName()) + DD->addAccelName(GV->getLinkageName(), *VariableDIE); } - addGlobalName(GV.getName(), *VariableDIE, DeclContext); return VariableDIE; } @@ -307,7 +307,7 @@ void DwarfCompileUnit::constructScopeDIE( DIScope DS(Scope->getScopeNode()); - assert((Scope->getInlinedAt() || !DS.isSubprogram()) && + assert((Scope->getInlinedAt() || !isa<MDSubprogram>(DS)) && "Only handle inlined subprograms here, use " "constructSubprogramScopeDIE for non-inlined " "subprograms"); @@ -318,7 +318,7 @@ void DwarfCompileUnit::constructScopeDIE( // avoid creating un-used children then removing them later when we find out // the scope DIE is null. std::unique_ptr<DIE> ScopeDIE; - if (Scope->getParent() && DS.isSubprogram()) { + if (Scope->getParent() && isa<MDSubprogram>(DS)) { ScopeDIE = constructInlinedScopeDIE(Scope); if (!ScopeDIE) return; @@ -340,7 +340,7 @@ void DwarfCompileUnit::constructScopeDIE( // There is no need to emit empty lexical block DIE. for (const auto &E : DD->findImportedEntitiesForScope(DS)) Children.push_back( - constructImportedEntityDIE(DIImportedEntity(E.second))); + constructImportedEntityDIE(cast<MDImportedEntity>(E.second))); } // If there are only other scopes as children, put them directly in the @@ -431,10 +431,10 @@ DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) { attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges()); // Add the call site information to the DIE. - DILocation DL(Scope->getInlinedAt()); + const MDLocation *IA = Scope->getInlinedAt(); addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None, - getOrCreateSourceID(DL.getFilename(), DL.getDirectory())); - addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, DL.getLineNumber()); + getOrCreateSourceID(IA->getFilename(), IA->getDirectory())); + addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine()); // Add name to the name table, we do this here because we're guaranteed // to have concrete versions of our DW_TAG_inlined_subprogram nodes. @@ -523,7 +523,7 @@ DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, assert(Expr != DV.getExpression().end() && "Wrong number of expressions"); DwarfExpr.AddMachineRegIndirect(FrameReg, Offset); - DwarfExpr.AddExpression(Expr->begin(), Expr->end()); + DwarfExpr.AddExpression((*Expr)->expr_op_begin(), (*Expr)->expr_op_end()); ++Expr; } addBlock(*VariableDie, dwarf::DW_AT_location, Loc); @@ -562,16 +562,14 @@ void DwarfCompileUnit::constructSubprogramScopeDIE(LexicalScope *Scope) { assert(Scope && Scope->getScopeNode()); assert(!Scope->getInlinedAt()); assert(!Scope->isAbstractScope()); - DISubprogram Sub(Scope->getScopeNode()); - - assert(Sub.isSubprogram()); + DISubprogram Sub = cast<MDSubprogram>(Scope->getScopeNode()); DD->getProcessedSPNodes().insert(Sub); DIE &ScopeDIE = updateSubprogramScopeDIE(Sub); // If this is a variadic function, add an unspecified parameter. - DITypeArray FnArgs = Sub.getType().getTypeArray(); + DITypeArray FnArgs = Sub->getType()->getTypeArray(); // Collect lexical scope children first. // ObjectPointer might be a local (non-argument) local variable if it's a @@ -582,8 +580,7 @@ void DwarfCompileUnit::constructSubprogramScopeDIE(LexicalScope *Scope) { // If we have a single element of null, it is a function that returns void. // If we have more than one elements and the last one is null, it is a // variadic function. - if (FnArgs.getNumElements() > 1 && - !FnArgs.getElement(FnArgs.getNumElements() - 1) && + if (FnArgs.size() > 1 && !FnArgs[FnArgs.size() - 1] && !includeMinimalInlineScopes()) ScopeDIE.addChild(make_unique<DIE>(dwarf::DW_TAG_unspecified_parameters)); } @@ -607,7 +604,7 @@ DwarfCompileUnit::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) { if (AbsDef) return; - DISubprogram SP(Scope->getScopeNode()); + DISubprogram SP = cast<MDSubprogram>(Scope->getScopeNode()); DIE *ContextDIE; @@ -617,11 +614,11 @@ DwarfCompileUnit::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) { // the important distinction that the DIDescriptor is not associated with the // DIE (since the DIDescriptor will be associated with the concrete DIE, if // any). It could be refactored to some common utility function. - else if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { + else if (auto *SPDecl = SP->getDeclaration()) { ContextDIE = &getUnitDie(); getOrCreateSubprogramDIE(SPDecl); } else - ContextDIE = getOrCreateContextDIE(resolve(SP.getContext())); + ContextDIE = getOrCreateContextDIE(resolve(SP->getScope())); // Passing null as the associated DIDescriptor because the abstract definition // shouldn't be found by lookup. @@ -637,28 +634,25 @@ DwarfCompileUnit::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) { std::unique_ptr<DIE> DwarfCompileUnit::constructImportedEntityDIE(const DIImportedEntity &Module) { - assert(Module.Verify() && - "Use one of the MDNode * overloads to handle invalid metadata"); - std::unique_ptr<DIE> IMDie = make_unique<DIE>((dwarf::Tag)Module.getTag()); + std::unique_ptr<DIE> IMDie = make_unique<DIE>((dwarf::Tag)Module->getTag()); insertDIE(Module, IMDie.get()); DIE *EntityDie; - DIDescriptor Entity = resolve(Module.getEntity()); - if (Entity.isNameSpace()) - EntityDie = getOrCreateNameSpace(DINameSpace(Entity)); - else if (Entity.isSubprogram()) - EntityDie = getOrCreateSubprogramDIE(DISubprogram(Entity)); - else if (Entity.isType()) - EntityDie = getOrCreateTypeDIE(DIType(Entity)); - else if (Entity.isGlobalVariable()) - EntityDie = getOrCreateGlobalVariableDIE(DIGlobalVariable(Entity)); + auto *Entity = resolve(Module->getEntity()); + if (auto *NS = dyn_cast<MDNamespace>(Entity)) + EntityDie = getOrCreateNameSpace(NS); + else if (auto *SP = dyn_cast<MDSubprogram>(Entity)) + EntityDie = getOrCreateSubprogramDIE(SP); + else if (auto *T = dyn_cast<MDType>(Entity)) + EntityDie = getOrCreateTypeDIE(T); + else if (auto *GV = dyn_cast<MDGlobalVariable>(Entity)) + EntityDie = getOrCreateGlobalVariableDIE(GV); else EntityDie = getDIE(Entity); assert(EntityDie); - addSourceLine(*IMDie, Module.getLineNumber(), - Module.getContext().getFilename(), - Module.getContext().getDirectory()); + addSourceLine(*IMDie, Module->getLine(), Module->getScope()->getFilename(), + Module->getScope()->getDirectory()); addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie); - StringRef Name = Module.getName(); + StringRef Name = Module->getName(); if (!Name.empty()) addString(*IMDie, dwarf::DW_AT_name, Name); @@ -683,21 +677,19 @@ void DwarfCompileUnit::finishSubprogramDefinition(DISubprogram SP) { } } void DwarfCompileUnit::collectDeadVariables(DISubprogram SP) { - assert(SP.isSubprogram() && "CU's subprogram list contains a non-subprogram"); - assert(SP.isDefinition() && + assert(SP && "CU's subprogram list contains a non-subprogram"); + assert(SP->isDefinition() && "CU's subprogram list contains a subprogram declaration"); - DIArray Variables = SP.getVariables(); - if (Variables.getNumElements() == 0) + auto Variables = SP->getVariables(); + if (Variables.size() == 0) return; DIE *SPDIE = DU->getAbstractSPDies().lookup(SP); if (!SPDIE) SPDIE = getDIE(SP); assert(SPDIE); - for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) { - DIVariable DV(Variables.getElement(vi)); - assert(DV.isVariable()); - DbgVariable NewVar(DV, DIExpression(), DD); + for (DIVariable DV : Variables) { + DbgVariable NewVar(DV, nullptr, DIExpression(), DD); auto VariableDie = constructVariableDIE(NewVar); applyVariableAttributes(NewVar, *VariableDie); SPDIE->addChild(std::move(VariableDie)); @@ -728,7 +720,7 @@ void DwarfCompileUnit::addGlobalType(DIType Ty, const DIE &Die, DIScope Context) { if (includeMinimalInlineScopes()) return; - std::string FullName = getParentContextString(Context) + Ty.getName().str(); + std::string FullName = getParentContextString(Context) + Ty->getName().str(); GlobalTypes[FullName] = &Die; } @@ -778,7 +770,7 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, ValidReg = DwarfExpr.AddMachineRegIndirect(Location.getReg(), Location.getOffset()); if (ValidReg) - DwarfExpr.AddExpression(Expr.begin(), Expr.end()); + DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end()); } else ValidReg = DwarfExpr.AddMachineRegExpression(Expr, Location.getReg()); @@ -816,10 +808,10 @@ void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form, void DwarfCompileUnit::applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie) { - DISubprogram SPDecl = SP.getFunctionDeclaration(); - DIScope Context = resolve(SPDecl ? SPDecl.getContext() : SP.getContext()); + auto *SPDecl = SP->getDeclaration(); + DIScope Context = resolve(SPDecl ? SPDecl->getScope() : SP->getScope()); applySubprogramAttributes(SP, SPDie, includeMinimalInlineScopes()); - addGlobalName(SP.getName(), SPDie, Context); + addGlobalName(SP->getName(), SPDie, Context); } bool DwarfCompileUnit::isDwoUnit() const { @@ -827,7 +819,7 @@ bool DwarfCompileUnit::isDwoUnit() const { } bool DwarfCompileUnit::includeMinimalInlineScopes() const { - return getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly || + return getCUNode()->getEmissionKind() == DIBuilder::LineTablesOnly || (DD->useSplitDwarf() && !Skeleton); } } // end llvm namespace diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index e9ebd97..fb8fc6e 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -129,20 +129,22 @@ bool DebugLocDwarfExpression::isFrameRegister(unsigned MachineReg) { /// resolve - Look in the DwarfDebug map for the MDNode that /// corresponds to the reference. -template <typename T> T DbgVariable::resolve(DIRef<T> Ref) const { +template <typename T> T *DbgVariable::resolve(TypedDebugNodeRef<T> Ref) const { return DD->resolve(Ref); } bool DbgVariable::isBlockByrefVariable() const { - assert(Var.isVariable() && "Invalid complex DbgVariable!"); - return Var.isBlockByrefVariable(DD->getTypeIdentifierMap()); + assert(Var && "Invalid complex DbgVariable!"); + return Var->getType() + .resolve(DD->getTypeIdentifierMap()) + ->isBlockByrefStruct(); } DIType DbgVariable::getType() const { - DIType Ty = Var.getType().resolve(DD->getTypeIdentifierMap()); + MDType *Ty = Var->getType().resolve(DD->getTypeIdentifierMap()); // FIXME: isBlockByrefVariable should be reformulated in terms of complex // addresses instead. - if (Var.isBlockByrefVariable(DD->getTypeIdentifierMap())) { + if (Ty->isBlockByrefStruct()) { /* Byref variables, in Blocks, are declared by the programmer as "SomeType VarName;", but the compiler creates a __Block_byref_x_VarName struct, and gives the variable VarName @@ -167,17 +169,17 @@ DIType DbgVariable::getType() const { have a DW_AT_location that tells the debugger how to unwind through the pointers and __Block_byref_x_VarName struct to find the actual value of the variable. The function addBlockByrefType does this. */ - DIType subType = Ty; - uint16_t tag = Ty.getTag(); + MDType *subType = Ty; + uint16_t tag = Ty->getTag(); if (tag == dwarf::DW_TAG_pointer_type) - subType = resolve(DIDerivedType(Ty).getTypeDerivedFrom()); + subType = resolve(DITypeRef(cast<MDDerivedType>(Ty)->getBaseType())); - DIArray Elements = DICompositeType(subType).getElements(); - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIDerivedType DT(Elements.getElement(i)); - if (getName() == DT.getName()) - return (resolve(DT.getTypeDerivedFrom())); + auto Elements = cast<MDCompositeTypeBase>(subType)->getElements(); + for (unsigned i = 0, N = Elements.size(); i < N; ++i) { + auto *DT = cast<MDDerivedTypeBase>(Elements[i]); + if (getName() == DT->getName()) + return resolve(DITypeRef(DT->getBaseType())); } } return Ty; @@ -275,25 +277,25 @@ static StringRef getObjCMethodName(StringRef In) { // that do not have a DW_AT_name or DW_AT_linkage_name field - this // is only slightly different than the lookup of non-standard ObjC names. void DwarfDebug::addSubprogramNames(DISubprogram SP, DIE &Die) { - if (!SP.isDefinition()) + if (!SP->isDefinition()) return; - addAccelName(SP.getName(), Die); + addAccelName(SP->getName(), Die); // If the linkage name is different than the name, go ahead and output // that as well into the name table. - if (SP.getLinkageName() != "" && SP.getName() != SP.getLinkageName()) - addAccelName(SP.getLinkageName(), Die); + if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName()) + addAccelName(SP->getLinkageName(), Die); // If this is an Objective-C selector name add it to the ObjC accelerator // too. - if (isObjCClass(SP.getName())) { + if (isObjCClass(SP->getName())) { StringRef Class, Category; - getObjCClassCategory(SP.getName(), Class, Category); + getObjCClassCategory(SP->getName(), Class, Category); addAccelObjC(Class, Die); if (Category != "") addAccelObjC(Category, Die); // Also add the base method name to the name table. - addAccelName(getObjCMethodName(SP.getName()), Die); + addAccelName(getObjCMethodName(SP->getName()), Die); } } @@ -302,11 +304,10 @@ void DwarfDebug::addSubprogramNames(DISubprogram SP, DIE &Die) { bool DwarfDebug::isSubprogramContext(const MDNode *Context) { if (!Context) return false; - DIDescriptor D(Context); - if (D.isSubprogram()) + if (isa<MDSubprogram>(Context)) return true; - if (D.isType()) - return isSubprogramContext(resolve(DIType(Context).getContext())); + if (auto *T = dyn_cast<MDType>(Context)) + return isSubprogramContext(resolve(T->getScope())); return false; } @@ -362,8 +363,8 @@ void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const { // Create new DwarfCompileUnit for the given metadata node with tag // DW_TAG_compile_unit. DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) { - StringRef FN = DIUnit.getFilename(); - CompilationDir = DIUnit.getDirectory(); + StringRef FN = DIUnit->getFilename(); + CompilationDir = DIUnit->getDirectory(); auto OwnedUnit = make_unique<DwarfCompileUnit>( InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder); @@ -381,9 +382,9 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) { Asm->OutStreamer.getContext().setMCLineTableCompilationDir( NewCU.getUniqueID(), CompilationDir); - NewCU.addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer()); + NewCU.addString(Die, dwarf::DW_AT_producer, DIUnit->getProducer()); NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, - DIUnit.getLanguage()); + DIUnit->getSourceLanguage()); NewCU.addString(Die, dwarf::DW_AT_name, FN); if (!useSplitDwarf()) { @@ -397,14 +398,14 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) { addGnuPubAttributes(NewCU, Die); } - if (DIUnit.isOptimized()) + if (DIUnit->isOptimized()) NewCU.addFlag(Die, dwarf::DW_AT_APPLE_optimized); - StringRef Flags = DIUnit.getFlags(); + StringRef Flags = DIUnit->getFlags(); if (!Flags.empty()) NewCU.addString(Die, dwarf::DW_AT_APPLE_flags, Flags); - if (unsigned RVer = DIUnit.getRunTimeVersion()) + if (unsigned RVer = DIUnit->getRuntimeVersion()) NewCU.addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, dwarf::DW_FORM_data1, RVer); @@ -420,9 +421,8 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) { void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, const MDNode *N) { - DIImportedEntity Module(N); - assert(Module.Verify()); - if (DIE *D = TheCU.getOrCreateContextDIE(Module.getContext())) + DIImportedEntity Module = cast<MDImportedEntity>(N); + if (DIE *D = TheCU.getOrCreateContextDIE(Module->getScope())) D->addChild(TheCU.constructImportedEntityDIE(Module)); } @@ -445,44 +445,35 @@ void DwarfDebug::beginModule() { SingleCU = CU_Nodes->getNumOperands() == 1; for (MDNode *N : CU_Nodes->operands()) { - DICompileUnit CUNode(N); + DICompileUnit CUNode = cast<MDCompileUnit>(N); DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode); - DIArray ImportedEntities = CUNode.getImportedEntities(); - for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i) - ScopesWithImportedEntities.push_back(std::make_pair( - DIImportedEntity(ImportedEntities.getElement(i)).getContext(), - ImportedEntities.getElement(i))); + for (auto *IE : CUNode->getImportedEntities()) + ScopesWithImportedEntities.push_back(std::make_pair(IE->getScope(), IE)); // Stable sort to preserve the order of appearance of imported entities. // This is to avoid out-of-order processing of interdependent declarations // within the same scope, e.g. { namespace A = base; namespace B = A; } std::stable_sort(ScopesWithImportedEntities.begin(), ScopesWithImportedEntities.end(), less_first()); - DIArray GVs = CUNode.getGlobalVariables(); - for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) - CU.getOrCreateGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i))); - DIArray SPs = CUNode.getSubprograms(); - for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) - SPMap.insert(std::make_pair(SPs.getElement(i), &CU)); - DIArray EnumTypes = CUNode.getEnumTypes(); - for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) { - DIType Ty(EnumTypes.getElement(i)); + for (auto *GV : CUNode->getGlobalVariables()) + CU.getOrCreateGlobalVariableDIE(GV); + for (auto *SP : CUNode->getSubprograms()) + SPMap.insert(std::make_pair(SP, &CU)); + for (DIType Ty : CUNode->getEnumTypes()) { // The enum types array by design contains pointers to // MDNodes rather than DIRefs. Unique them here. - DIType UniqueTy(resolve(Ty.getRef())); + DIType UniqueTy = cast<MDType>(resolve(Ty->getRef())); CU.getOrCreateTypeDIE(UniqueTy); } - DIArray RetainedTypes = CUNode.getRetainedTypes(); - for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) { - DIType Ty(RetainedTypes.getElement(i)); + for (DIType Ty : CUNode->getRetainedTypes()) { // The retained types array by design contains pointers to // MDNodes rather than DIRefs. Unique them here. - DIType UniqueTy(resolve(Ty.getRef())); + DIType UniqueTy = cast<MDType>(resolve(Ty->getRef())); CU.getOrCreateTypeDIE(UniqueTy); } // Emit imported_modules last so that the relevant context is already // available. - for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i) - constructAndAddImportedEntityDIE(CU, ImportedEntities.getElement(i)); + for (auto *IE : CUNode->getImportedEntities()) + constructAndAddImportedEntityDIE(CU, IE); } // Tell MMI that we have debug info. @@ -498,7 +489,8 @@ void DwarfDebug::finishVariableDefinitions() { // DIE::getUnit isn't simple - it walks parent pointers, etc. DwarfCompileUnit *Unit = lookupUnit(VariableDie->getUnit()); assert(Unit); - DbgVariable *AbsVar = getExistingAbstractVariable(Var->getVariable()); + DbgVariable *AbsVar = getExistingAbstractVariable( + InlinedVariable(Var->getVariable(), Var->getInlinedAt())); if (AbsVar && AbsVar->getDIE()) { Unit->addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin, *AbsVar->getDIE()); @@ -510,7 +502,7 @@ void DwarfDebug::finishVariableDefinitions() { void DwarfDebug::finishSubprogramDefinitions() { for (const auto &P : SPMap) forBothCUs(*P.second, [&](DwarfCompileUnit &CU) { - CU.finishSubprogramDefinition(DISubprogram(P.first)); + CU.finishSubprogramDefinition(cast<MDSubprogram>(P.first)); }); } @@ -521,14 +513,12 @@ void DwarfDebug::collectDeadVariables() { if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) { for (MDNode *N : CU_Nodes->operands()) { - DICompileUnit TheCU(N); + DICompileUnit TheCU = cast<MDCompileUnit>(N); // Construct subprogram DIE and add variables DIEs. DwarfCompileUnit *SPCU = static_cast<DwarfCompileUnit *>(CUMap.lookup(TheCU)); assert(SPCU && "Unable to find Compile Unit!"); - DIArray Subprograms = TheCU.getSubprograms(); - for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) { - DISubprogram SP(Subprograms.getElement(i)); + for (auto *SP : TheCU->getSubprograms()) { if (ProcessedSPNodes.count(SP) != 0) continue; SPCU->collectDeadVariables(SP); @@ -671,70 +661,71 @@ void DwarfDebug::endModule() { } // Find abstract variable, if any, associated with Var. -DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV, +DbgVariable *DwarfDebug::getExistingAbstractVariable(InlinedVariable IV, DIVariable &Cleansed) { - LLVMContext &Ctx = DV->getContext(); // More then one inlined variable corresponds to one abstract variable. - // FIXME: This duplication of variables when inlining should probably be - // removed. It's done to allow each DIVariable to describe its location - // because the DebugLoc on the dbg.value/declare isn't accurate. We should - // make it accurate then remove this duplication/cleansing stuff. - Cleansed = cleanseInlinedVariable(DV, Ctx); + Cleansed = IV.first; auto I = AbstractVariables.find(Cleansed); if (I != AbstractVariables.end()) return I->second.get(); return nullptr; } -DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV) { +DbgVariable *DwarfDebug::getExistingAbstractVariable(InlinedVariable IV) { DIVariable Cleansed; - return getExistingAbstractVariable(DV, Cleansed); + return getExistingAbstractVariable(IV, Cleansed); } void DwarfDebug::createAbstractVariable(const DIVariable &Var, LexicalScope *Scope) { - auto AbsDbgVariable = make_unique<DbgVariable>(Var, DIExpression(), this); + auto AbsDbgVariable = + make_unique<DbgVariable>(Var, nullptr, DIExpression(), this); InfoHolder.addScopeVariable(Scope, AbsDbgVariable.get()); AbstractVariables[Var] = std::move(AbsDbgVariable); } -void DwarfDebug::ensureAbstractVariableIsCreated(const DIVariable &DV, +void DwarfDebug::ensureAbstractVariableIsCreated(InlinedVariable IV, const MDNode *ScopeNode) { - DIVariable Cleansed = DV; - if (getExistingAbstractVariable(DV, Cleansed)) + DIVariable Cleansed; + if (getExistingAbstractVariable(IV, Cleansed)) return; - createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope(ScopeNode)); + createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope( + cast<MDLocalScope>(ScopeNode))); } -void -DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(const DIVariable &DV, - const MDNode *ScopeNode) { - DIVariable Cleansed = DV; - if (getExistingAbstractVariable(DV, Cleansed)) +void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped( + InlinedVariable IV, const MDNode *ScopeNode) { + DIVariable Cleansed; + if (getExistingAbstractVariable(IV, Cleansed)) return; - if (LexicalScope *Scope = LScopes.findAbstractScope(ScopeNode)) + if (LexicalScope *Scope = + LScopes.findAbstractScope(cast_or_null<MDLocalScope>(ScopeNode))) createAbstractVariable(Cleansed, Scope); } // Collect variable information from side table maintained by MMI. void DwarfDebug::collectVariableInfoFromMMITable( - SmallPtrSetImpl<const MDNode *> &Processed) { + DenseSet<InlinedVariable> &Processed) { for (const auto &VI : MMI->getVariableDbgInfo()) { if (!VI.Var) continue; - Processed.insert(VI.Var); + assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) && + "Expected inlined-at fields to agree"); + + InlinedVariable Var(VI.Var, VI.Loc->getInlinedAt()); + Processed.insert(Var); LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc); // If variable scope is not found then skip this variable. if (!Scope) continue; - DIVariable DV(VI.Var); - DIExpression Expr(VI.Expr); - ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode()); - auto RegVar = make_unique<DbgVariable>(DV, Expr, this, VI.Slot); + DIExpression Expr = cast_or_null<MDExpression>(VI.Expr); + ensureAbstractVariableIsCreatedIfScoped(Var, Scope->getScopeNode()); + auto RegVar = + make_unique<DbgVariable>(Var.first, Var.second, Expr, this, VI.Slot); if (InfoHolder.addScopeVariable(Scope, RegVar.get())) ConcreteVariables.push_back(std::move(RegVar)); } @@ -768,12 +759,12 @@ static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) { /// Determine whether two variable pieces overlap. static bool piecesOverlap(DIExpression P1, DIExpression P2) { - if (!P1.isBitPiece() || !P2.isBitPiece()) + if (!P1->isBitPiece() || !P2->isBitPiece()) return true; - unsigned l1 = P1.getBitPieceOffset(); - unsigned l2 = P2.getBitPieceOffset(); - unsigned r1 = l1 + P1.getBitPieceSize(); - unsigned r2 = l2 + P2.getBitPieceSize(); + unsigned l1 = P1->getBitPieceOffset(); + unsigned l2 = P2->getBitPieceOffset(); + unsigned r1 = l1 + P1->getBitPieceSize(); + unsigned r2 = l2 + P2->getBitPieceSize(); // True where [l1,r1[ and [r1,r2[ overlap. return (l1 < r2) && (l2 < r1); } @@ -845,7 +836,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, bool couldMerge = false; // If this is a piece, it may belong to the current DebugLocEntry. - if (DIExpr.isBitPiece()) { + if (DIExpr->isBitPiece()) { // Add this value to the list of open ranges. OpenRanges.push_back(Value); @@ -884,35 +875,34 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, // Find variables for each lexical scope. -void -DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP, - SmallPtrSetImpl<const MDNode *> &Processed) { +void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP, + DenseSet<InlinedVariable> &Processed) { // Grab the variable info that was squirreled away in the MMI side-table. collectVariableInfoFromMMITable(Processed); for (const auto &I : DbgValues) { - DIVariable DV(I.first); - if (Processed.count(DV)) + InlinedVariable IV = I.first; + if (Processed.count(IV)) continue; - // Instruction ranges, specifying where DV is accessible. + // Instruction ranges, specifying where IV is accessible. const auto &Ranges = I.second; if (Ranges.empty()) continue; LexicalScope *Scope = nullptr; - if (MDNode *IA = DV.getInlinedAt()) - Scope = LScopes.findInlinedScope(DV.getContext(), IA); + if (const MDLocation *IA = IV.second) + Scope = LScopes.findInlinedScope(IV.first->getScope(), IA); else - Scope = LScopes.findLexicalScope(DV.getContext()); + Scope = LScopes.findLexicalScope(IV.first->getScope()); // If variable scope is not found then skip this variable. if (!Scope) continue; - Processed.insert(DV); + Processed.insert(IV); const MachineInstr *MInsn = Ranges.front().first; assert(MInsn->isDebugValue() && "History must begin with debug value"); - ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode()); + ensureAbstractVariableIsCreatedIfScoped(IV, Scope->getScopeNode()); ConcreteVariables.push_back(make_unique<DbgVariable>(MInsn, this)); DbgVariable *RegVar = ConcreteVariables.back().get(); InfoHolder.addScopeVariable(Scope, RegVar); @@ -937,16 +927,15 @@ DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP, } // Collect info for variables that were optimized out. - DIArray Variables = SP.getVariables(); - for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) { - DIVariable DV(Variables.getElement(i)); - assert(DV.isVariable()); - if (!Processed.insert(DV).second) + for (DIVariable DV : SP->getVariables()) { + if (!Processed.insert(InlinedVariable(DV, nullptr)).second) continue; - if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) { - ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode()); + if (LexicalScope *Scope = LScopes.findLexicalScope(DV->getScope())) { + ensureAbstractVariableIsCreatedIfScoped(InlinedVariable(DV, nullptr), + Scope->getScopeNode()); DIExpression NoExpr; - ConcreteVariables.push_back(make_unique<DbgVariable>(DV, NoExpr, this)); + ConcreteVariables.push_back( + make_unique<DbgVariable>(DV, nullptr, NoExpr, this)); InfoHolder.addScopeVariable(Scope, ConcreteVariables.back().get()); } } @@ -972,7 +961,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { if (!MI->isDebugValue()) { DebugLoc DL = MI->getDebugLoc(); if (DL != PrevInstLoc) { - if (!DL.isUnknown()) { + if (DL) { unsigned Flags = 0; PrevInstLoc = DL; if (DL == PrologEndLoc) { @@ -984,7 +973,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { Asm->OutStreamer.getContext().getCurrentDwarfLoc().getLine()) Flags |= DWARF2_FLAG_IS_STMT; - const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext()); + const MDNode *Scope = DL.getScope(); recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags); } else if (UnknownLocations) { PrevInstLoc = DL; @@ -1072,7 +1061,7 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) { for (const auto &MBB : *MF) for (const auto &MI : MBB) if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) && - !MI.getDebugLoc().isUnknown()) { + MI.getDebugLoc()) { // Did the target forget to set the FrameSetup flag for CFI insns? assert(!MI.isCFIInstruction() && "First non-frame-setup instruction is a CFI instruction."); @@ -1137,11 +1126,11 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // The first mention of a function argument gets the CurrentFnBegin // label, so arguments are visible when breaking at function entry. - DIVariable DIVar(Ranges.front().first->getDebugVariable()); - if (DIVar.isVariable() && DIVar.getTag() == dwarf::DW_TAG_arg_variable && - getDISubprogram(DIVar.getContext()).describes(MF->getFunction())) { + DIVariable DIVar = Ranges.front().first->getDebugVariable(); + if (DIVar->getTag() == dwarf::DW_TAG_arg_variable && + getDISubprogram(DIVar->getScope())->describes(MF->getFunction())) { LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin(); - if (Ranges.front().first->getDebugExpression().isBitPiece()) { + if (Ranges.front().first->getDebugExpression()->isBitPiece()) { // Mark all non-overlapping initial pieces. for (auto I = Ranges.begin(); I != Ranges.end(); ++I) { DIExpression Piece = I->first->getDebugExpression(); @@ -1168,15 +1157,11 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Record beginning of function. PrologEndLoc = findPrologueEndLoc(MF); - if (!PrologEndLoc.isUnknown()) { - DebugLoc FnStartDL = - PrologEndLoc.getFnDebugLoc(MF->getFunction()->getContext()); - + if (MDLocation *L = PrologEndLoc) { // We'd like to list the prologue as "not statements" but GDB behaves // poorly if we do that. Revisit this with caution/GDB (7.5+) testing. - recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(), - FnStartDL.getScope(MF->getFunction()->getContext()), - DWARF2_FLAG_IS_STMT); + auto *SP = L->getInlinedAtScope()->getSubprogram(); + recordSourceLine(SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT); } } @@ -1199,10 +1184,10 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { Asm->OutStreamer.getContext().setDwarfCompileUnitID(0); LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); - DISubprogram SP(FnScope->getScopeNode()); + DISubprogram SP = cast<MDSubprogram>(FnScope->getScopeNode()); DwarfCompileUnit &TheCU = *SPMap.lookup(SP); - SmallPtrSet<const MDNode *, 16> ProcessedVars; + DenseSet<InlinedVariable> ProcessedVars; collectVariableInfo(TheCU, SP, ProcessedVars); // Add the range of this function to the list of ranges for the CU. @@ -1210,7 +1195,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { // Under -gmlt, skip building the subprogram if there are no inlined // subroutines inside it. - if (TheCU.getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly && + if (TheCU.getCUNode()->getEmissionKind() == DIBuilder::LineTablesOnly && LScopes.getAbstractScopesList().empty() && !IsDarwin) { assert(InfoHolder.getScopeVariables().empty()); assert(DbgValues.empty()); @@ -1229,16 +1214,13 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { #endif // Construct abstract scopes. for (LexicalScope *AScope : LScopes.getAbstractScopesList()) { - DISubprogram SP(AScope->getScopeNode()); - assert(SP.isSubprogram()); + DISubprogram SP = cast<MDSubprogram>(AScope->getScopeNode()); // Collect info for variables that were optimized out. - DIArray Variables = SP.getVariables(); - for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) { - DIVariable DV(Variables.getElement(i)); - assert(DV && DV.isVariable()); - if (!ProcessedVars.insert(DV).second) + for (DIVariable DV : SP->getVariables()) { + if (!ProcessedVars.insert(InlinedVariable(DV, nullptr)).second) continue; - ensureAbstractVariableIsCreated(DV, DV.getContext()); + ensureAbstractVariableIsCreated(InlinedVariable(DV, nullptr), + DV->getScope()); assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes && "ensureAbstractVariableIsCreated inserted abstract scopes"); } @@ -1270,12 +1252,11 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, StringRef Dir; unsigned Src = 1; unsigned Discriminator = 0; - if (DIScope Scope = DIScope(S)) { - assert(Scope.isScope()); - Fn = Scope.getFilename(); - Dir = Scope.getDirectory(); - if (Scope.isLexicalBlockFile()) - Discriminator = DILexicalBlockFile(S).getDiscriminator(); + if (auto *Scope = cast_or_null<MDScope>(S)) { + Fn = Scope->getFilename(); + Dir = Scope->getDirectory(); + if (auto *LBF = dyn_cast<MDLexicalBlockFile>(Scope)) + Discriminator = LBF->getDiscriminator(); unsigned CUID = Asm->OutStreamer.getContext().getDwarfCompileUnitID(); Src = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID]) @@ -1499,23 +1480,25 @@ static void emitDebugLocValue(const AsmPrinter &AP, Streamer); // Regular entry. if (Value.isInt()) { - DIBasicType BTy(DV.getType().resolve(TypeIdentifierMap)); - if (BTy.Verify() && (BTy.getEncoding() == dwarf::DW_ATE_signed || - BTy.getEncoding() == dwarf::DW_ATE_signed_char)) + MDType *T = DV->getType().resolve(TypeIdentifierMap); + auto *B = dyn_cast<MDBasicType>(T); + if (B && (B->getEncoding() == dwarf::DW_ATE_signed || + B->getEncoding() == dwarf::DW_ATE_signed_char)) DwarfExpr.AddSignedConstant(Value.getInt()); else DwarfExpr.AddUnsignedConstant(Value.getInt()); } else if (Value.isLocation()) { MachineLocation Loc = Value.getLoc(); DIExpression Expr = Value.getExpression(); - if (!Expr || (Expr.getNumElements() == 0)) + if (!Expr || !Expr->getNumElements()) // Regular entry. AP.EmitDwarfRegOp(Streamer, Loc); else { // Complex address entry. if (Loc.getOffset()) { DwarfExpr.AddMachineRegIndirect(Loc.getReg(), Loc.getOffset()); - DwarfExpr.AddExpression(Expr.begin(), Expr.end(), PieceOffsetInBits); + DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end(), + PieceOffsetInBits); } else DwarfExpr.AddMachineRegExpression(Expr, Loc.getReg(), PieceOffsetInBits); @@ -1542,8 +1525,8 @@ void DebugLocEntry::finalize(const AsmPrinter &AP, unsigned Offset = 0; for (auto Piece : Values) { DIExpression Expr = Piece.getExpression(); - unsigned PieceOffset = Expr.getBitPieceOffset(); - unsigned PieceSize = Expr.getBitPieceSize(); + unsigned PieceOffset = Expr->getBitPieceOffset(); + unsigned PieceSize = Expr->getBitPieceSize(); assert(Offset <= PieceOffset && "overlapping or duplicate pieces"); if (Offset < PieceOffset) { // The DWARF spec seriously mandates pieces with no locations for gaps. @@ -1554,15 +1537,7 @@ void DebugLocEntry::finalize(const AsmPrinter &AP, Offset += PieceOffset-Offset; } Offset += PieceSize; - -#ifndef NDEBUG - DIVariable Var = Piece.getVariable(); - unsigned VarSize = Var.getSizeInBits(TypeIdentifierMap); - assert(PieceSize+PieceOffset <= VarSize - && "piece is larger than or outside of variable"); - assert(PieceSize != VarSize - && "piece covers entire variable"); -#endif + emitDebugLocValue(AP, TypeIdentifierMap, Streamer, Piece, PieceOffset); } } else { @@ -1850,7 +1825,7 @@ void DwarfDebug::emitDebugRanges() { void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die, std::unique_ptr<DwarfUnit> NewU) { NewU->addString(Die, dwarf::DW_AT_GNU_dwo_name, - U.getCUNode().getSplitDebugFilename()); + U.getCUNode()->getSplitDebugFilename()); if (!CompilationDir.empty()) NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); @@ -1914,7 +1889,7 @@ MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) { if (!useSplitDwarf()) return nullptr; if (SingleCU) - SplitTypeUnitFileTable.setCompilationDir(CU.getCUNode().getDirectory()); + SplitTypeUnitFileTable.setCompilationDir(CU.getCUNode()->getDirectory()); return &SplitTypeUnitFileTable; } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 74db3ef..e067f41 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -21,6 +21,7 @@ #include "DwarfAccelTable.h" #include "DwarfFile.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -74,7 +75,8 @@ public: /// - Variables that are described by multiple MMI table entries have multiple /// expressions and frame indices. class DbgVariable { - DIVariable Var; /// Variable Descriptor. + DIVariable Var; /// Variable Descriptor. + DILocation IA; /// Inlined at location. SmallVector<DIExpression, 1> Expr; /// Complex address location expression. DIE *TheDIE; /// Variable DIE. unsigned DotDebugLocOffset; /// Offset in DotDebugLocEntries. @@ -84,11 +86,11 @@ class DbgVariable { public: /// Construct a DbgVariable from a DIVariable. - DbgVariable(DIVariable V, DIExpression E, DwarfDebug *DD, int FI = ~0) - : Var(V), Expr(1, E), TheDIE(nullptr), DotDebugLocOffset(~0U), - MInsn(nullptr), DD(DD) { + DbgVariable(DIVariable V, DILocation IA, DIExpression E, DwarfDebug *DD, + int FI = ~0) + : Var(V), IA(IA), Expr(1, E), TheDIE(nullptr), DotDebugLocOffset(~0U), + MInsn(nullptr), DD(DD) { FrameIndex.push_back(FI); - assert(Var.Verify()); assert(!E || E->isValid()); } @@ -96,6 +98,7 @@ public: /// AbstractVar may be NULL. DbgVariable(const MachineInstr *DbgValue, DwarfDebug *DD) : Var(DbgValue->getDebugVariable()), + IA(DbgValue->getDebugLoc()->getInlinedAt()), Expr(1, DbgValue->getDebugExpression()), TheDIE(nullptr), DotDebugLocOffset(~0U), MInsn(DbgValue), DD(DD) { FrameIndex.push_back(~0); @@ -103,12 +106,13 @@ public: // Accessors. DIVariable getVariable() const { return Var; } + DILocation getInlinedAt() const { return IA; } const ArrayRef<DIExpression> getExpression() const { return Expr; } void setDIE(DIE &D) { TheDIE = &D; } DIE *getDIE() const { return TheDIE; } void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; } - StringRef getName() const { return Var.getName(); } + StringRef getName() const { return Var->getName(); } const MachineInstr *getMInsn() const { return MInsn; } const ArrayRef<int> getFrameIndex() const { return FrameIndex; } @@ -116,6 +120,7 @@ public: assert( DotDebugLocOffset == ~0U && !MInsn && "not an MMI entry"); assert(V.DotDebugLocOffset == ~0U && !V.MInsn && "not an MMI entry"); assert(V.Var == Var && "conflicting DIVariable"); + assert(V.IA == IA && "conflicting inlined-at location"); if (V.getFrameIndex().back() != ~0) { auto E = V.getExpression(); @@ -124,40 +129,40 @@ public: FrameIndex.append(FI.begin(), FI.end()); } assert(Expr.size() > 1 - ? std::all_of(Expr.begin(), Expr.end(), - [](DIExpression &E) { return E.isBitPiece(); }) - : (true && "conflicting locations for variable")); + ? std::all_of(Expr.begin(), Expr.end(), + [](DIExpression &E) { return E->isBitPiece(); }) + : (true && "conflicting locations for variable")); } // Translate tag to proper Dwarf tag. dwarf::Tag getTag() const { - if (Var.getTag() == dwarf::DW_TAG_arg_variable) + if (Var->getTag() == dwarf::DW_TAG_arg_variable) return dwarf::DW_TAG_formal_parameter; return dwarf::DW_TAG_variable; } /// \brief Return true if DbgVariable is artificial. bool isArtificial() const { - if (Var.isArtificial()) + if (Var->isArtificial()) return true; - if (getType().isArtificial()) + if (getType()->isArtificial()) return true; return false; } bool isObjectPointer() const { - if (Var.isObjectPointer()) + if (Var->isObjectPointer()) return true; - if (getType().isObjectPointer()) + if (getType()->isObjectPointer()) return true; return false; } bool variableHasComplexAddress() const { - assert(Var.isVariable() && "Invalid complex DbgVariable!"); + assert(Var && "Invalid complex DbgVariable!"); assert(Expr.size() == 1 && "variableHasComplexAddress() invoked on multi-FI variable"); - return Expr.back().getNumElements() > 0; + return Expr.back()->getNumElements() > 0; } bool isBlockByrefVariable() const; DIType getType() const; @@ -165,7 +170,7 @@ public: private: /// resolve - Look in the DwarfDebug map for the MDNode that /// corresponds to the reference. - template <typename T> T resolve(DIRef<T> Ref) const; + template <typename T> T *resolve(TypedDebugNodeRef<T> Ref) const; }; @@ -324,14 +329,16 @@ class DwarfDebug : public AsmPrinterHandler { return InfoHolder.getUnits(); } + typedef DbgValueHistoryMap::InlinedVariable InlinedVariable; + /// \brief Find abstract variable associated with Var. - DbgVariable *getExistingAbstractVariable(const DIVariable &DV, + DbgVariable *getExistingAbstractVariable(InlinedVariable IV, DIVariable &Cleansed); - DbgVariable *getExistingAbstractVariable(const DIVariable &DV); + DbgVariable *getExistingAbstractVariable(InlinedVariable IV); void createAbstractVariable(const DIVariable &DV, LexicalScope *Scope); - void ensureAbstractVariableIsCreated(const DIVariable &Var, + void ensureAbstractVariableIsCreated(InlinedVariable Var, const MDNode *Scope); - void ensureAbstractVariableIsCreatedIfScoped(const DIVariable &Var, + void ensureAbstractVariableIsCreatedIfScoped(InlinedVariable Var, const MDNode *Scope); /// \brief Construct a DIE for this abstract scope. @@ -461,7 +468,7 @@ class DwarfDebug : public AsmPrinterHandler { /// \brief Populate LexicalScope entries with variables' info. void collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP, - SmallPtrSetImpl<const MDNode *> &ProcessedVars); + DenseSet<InlinedVariable> &ProcessedVars); /// \brief Build the location list for all DBG_VALUEs in the /// function that describe the same variable. @@ -470,7 +477,7 @@ class DwarfDebug : public AsmPrinterHandler { /// \brief Collect variable information from the side table maintained /// by MMI. - void collectVariableInfoFromMMITable(SmallPtrSetImpl<const MDNode *> &P); + void collectVariableInfoFromMMITable(DenseSet<InlinedVariable> &P); /// \brief Ensure that a label will be emitted before MI. void requestLabelBeforeInsn(const MachineInstr *MI) { @@ -567,7 +574,7 @@ public: void emitDebugLocEntryLocation(const DebugLocEntry &Entry); /// Find the MDNode for the given reference. - template <typename T> T resolve(DIRef<T> Ref) const { + template <typename T> T *resolve(TypedDebugNodeRef<T> Ref) const { return Ref.resolve(TypeIdentifierMap); } diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index 6eaf707..a4fd36f 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -48,7 +48,7 @@ public: // Main entry points. // DwarfCFIException(AsmPrinter *A); - virtual ~DwarfCFIException(); + ~DwarfCFIException() override; /// Emit all exception information that should come after the content. void endModule() override; @@ -70,7 +70,7 @@ public: // Main entry points. // ARMException(AsmPrinter *A); - virtual ~ARMException(); + ~ARMException() override; /// Emit all exception information that should come after the content. void endModule() override; diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index 489e455..e576c93 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -195,27 +195,27 @@ static unsigned getOffsetOrZero(unsigned OffsetInBits, bool DwarfExpression::AddMachineRegExpression(DIExpression Expr, unsigned MachineReg, unsigned PieceOffsetInBits) { - auto I = Expr.begin(); - auto E = Expr.end(); + auto I = Expr->expr_op_begin(); + auto E = Expr->expr_op_end(); if (I == E) return AddMachineRegPiece(MachineReg); // Pattern-match combinations for which more efficient representations exist // first. bool ValidReg = false; - switch (*I) { + switch (I->getOp()) { case dwarf::DW_OP_bit_piece: { - unsigned OffsetInBits = I->getArg(1); - unsigned SizeInBits = I->getArg(2); + unsigned OffsetInBits = I->getArg(0); + unsigned SizeInBits = I->getArg(1); // Piece always comes at the end of the expression. return AddMachineRegPiece(MachineReg, SizeInBits, getOffsetOrZero(OffsetInBits, PieceOffsetInBits)); } case dwarf::DW_OP_plus: { // [DW_OP_reg,Offset,DW_OP_plus,DW_OP_deref] --> [DW_OP_breg,Offset]. - auto N = I->getNext(); - if ((N != E) && (*N == dwarf::DW_OP_deref)) { - unsigned Offset = I->getArg(1); + auto N = I.getNext(); + if (N != E && N->getOp() == dwarf::DW_OP_deref) { + unsigned Offset = I->getArg(0); ValidReg = AddMachineRegIndirect(MachineReg, Offset); std::advance(I, 2); break; @@ -240,20 +240,20 @@ bool DwarfExpression::AddMachineRegExpression(DIExpression Expr, return true; } -void DwarfExpression::AddExpression(DIExpression::iterator I, - DIExpression::iterator E, +void DwarfExpression::AddExpression(MDExpression::expr_op_iterator I, + MDExpression::expr_op_iterator E, unsigned PieceOffsetInBits) { for (; I != E; ++I) { - switch (*I) { + switch (I->getOp()) { case dwarf::DW_OP_bit_piece: { - unsigned OffsetInBits = I->getArg(1); - unsigned SizeInBits = I->getArg(2); + unsigned OffsetInBits = I->getArg(0); + unsigned SizeInBits = I->getArg(1); AddOpPiece(SizeInBits, getOffsetOrZero(OffsetInBits, PieceOffsetInBits)); break; } case dwarf::DW_OP_plus: EmitOp(dwarf::DW_OP_plus_uconst); - EmitUnsigned(I->getArg(1)); + EmitUnsigned(I->getArg(0)); break; case dwarf::DW_OP_deref: EmitOp(dwarf::DW_OP_deref); diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h index 985d52c..a8b65f5 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -97,7 +97,8 @@ public: /// Emit a the operations remaining the DIExpressionIterator I. /// \param PieceOffsetInBits If this is one piece out of a fragmented /// location, this is the offset of the piece inside the entire variable. - void AddExpression(DIExpression::iterator I, DIExpression::iterator E, + void AddExpression(MDExpression::expr_op_iterator I, + MDExpression::expr_op_iterator E, unsigned PieceOffsetInBits = 0); }; diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp index 60acc58e..32adb40 100644 --- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -139,7 +139,7 @@ bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS]; DIVariable DV = Var->getVariable(); // Variables with positive arg numbers are parameters. - if (unsigned ArgNum = DV.getArgNumber()) { + if (unsigned ArgNum = DV->getArg()) { // Keep all parameters in order at the start of the variable list to ensure // function types are correct (no out-of-order parameters) // @@ -149,7 +149,7 @@ bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { // rather than linear search. auto I = Vars.begin(); while (I != Vars.end()) { - unsigned CurNum = (*I)->getVariable().getArgNumber(); + unsigned CurNum = (*I)->getVariable()->getArg(); // A local (non-parameter) variable has been found, insert immediately // before it. if (CurNum == 0) diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index f6af73f..9154652 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -175,8 +175,8 @@ static bool isShareableAcrossCUs(DIDescriptor D) { // level already) but may be implementable for some value in projects // building multiple independent libraries with LTO and then linking those // together. - return (D.isType() || - (D.isSubprogram() && !DISubprogram(D).isDefinition())) && + return (isa<MDType>(D) || + (isa<MDSubprogram>(D) && !cast<MDSubprogram>(D)->isDefinition())) && !GenerateDwarfTypeUnits; } @@ -397,52 +397,48 @@ void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, StringRef File, /// addSourceLine - Add location information to specified debug information /// entry. void DwarfUnit::addSourceLine(DIE &Die, DIVariable V) { - assert(V.isVariable()); + assert(V); - addSourceLine(Die, V.getLineNumber(), V.getContext().getFilename(), - V.getContext().getDirectory()); + addSourceLine(Die, V->getLine(), V->getScope()->getFilename(), + V->getScope()->getDirectory()); } /// addSourceLine - Add location information to specified debug information /// entry. void DwarfUnit::addSourceLine(DIE &Die, DIGlobalVariable G) { - assert(G.isGlobalVariable()); + assert(G); - addSourceLine(Die, G.getLineNumber(), G.getFilename(), G.getDirectory()); + addSourceLine(Die, G->getLine(), G->getFilename(), G->getDirectory()); } /// addSourceLine - Add location information to specified debug information /// entry. void DwarfUnit::addSourceLine(DIE &Die, DISubprogram SP) { - assert(SP.isSubprogram()); + assert(SP); - addSourceLine(Die, SP.getLineNumber(), SP.getFilename(), SP.getDirectory()); + addSourceLine(Die, SP->getLine(), SP->getFilename(), SP->getDirectory()); } /// addSourceLine - Add location information to specified debug information /// entry. void DwarfUnit::addSourceLine(DIE &Die, DIType Ty) { - assert(Ty.isType()); + assert(Ty); - addSourceLine(Die, Ty.getLineNumber(), Ty.getFilename(), Ty.getDirectory()); + addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory()); } /// addSourceLine - Add location information to specified debug information /// entry. void DwarfUnit::addSourceLine(DIE &Die, DIObjCProperty Ty) { - assert(Ty.isObjCProperty()); + assert(Ty); - DIFile File = Ty.getFile(); - addSourceLine(Die, Ty.getLineNumber(), File.getFilename(), - File.getDirectory()); + addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory()); } /// addSourceLine - Add location information to specified debug information /// entry. void DwarfUnit::addSourceLine(DIE &Die, DINameSpace NS) { - assert(NS.Verify()); - - addSourceLine(Die, NS.getLineNumber(), NS.getFilename(), NS.getDirectory()); + addSourceLine(Die, NS->getLine(), NS->getFilename(), NS->getDirectory()); } /// addRegisterOp - Add register operand. @@ -525,28 +521,26 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, const MachineLocation &Location) { DIType Ty = DV.getType(); DIType TmpTy = Ty; - uint16_t Tag = Ty.getTag(); + uint16_t Tag = Ty->getTag(); bool isPointer = false; StringRef varName = DV.getName(); if (Tag == dwarf::DW_TAG_pointer_type) { - DIDerivedType DTy(Ty); - TmpTy = resolve(DTy.getTypeDerivedFrom()); + DIDerivedType DTy = cast<MDDerivedType>(Ty); + TmpTy = resolve(DTy->getBaseType()); isPointer = true; } - DICompositeType blockStruct(TmpTy); - // Find the __forwarding field and the variable field in the __Block_byref // struct. - DIArray Fields = blockStruct.getElements(); + DIArray Fields = cast<MDCompositeTypeBase>(TmpTy)->getElements(); DIDerivedType varField; DIDerivedType forwardingField; - for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) { - DIDerivedType DT(Fields.getElement(i)); - StringRef fieldName = DT.getName(); + for (unsigned i = 0, N = Fields.size(); i < N; ++i) { + DIDerivedType DT = cast<MDDerivedTypeBase>(Fields[i]); + StringRef fieldName = DT->getName(); if (fieldName == "__forwarding") forwardingField = DT; else if (fieldName == varName) @@ -554,8 +548,8 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, } // Get the offsets for the forwarding field and the variable field. - unsigned forwardingFieldOffset = forwardingField.getOffsetInBits() >> 3; - unsigned varFieldOffset = varField.getOffsetInBits() >> 2; + unsigned forwardingFieldOffset = forwardingField->getOffsetInBits() >> 3; + unsigned varFieldOffset = varField->getOffsetInBits() >> 2; // Decode the original location, and use that as the start of the byref // variable's location. @@ -601,9 +595,8 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, /// Return true if type encoding is unsigned. static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) { - DIDerivedType DTy(Ty); - if (DTy.isDerivedType()) { - dwarf::Tag T = (dwarf::Tag)Ty.getTag(); + if (DIDerivedType DTy = dyn_cast<MDDerivedTypeBase>(Ty)) { + dwarf::Tag T = (dwarf::Tag)Ty->getTag(); // Encode pointer constants as unsigned bytes. This is used at least for // null pointer constant emission. // (Pieces of) aggregate types that get hacked apart by SROA may also be @@ -624,56 +617,55 @@ static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) { T == dwarf::DW_TAG_volatile_type || T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_enumeration_type); - if (DITypeRef Deriv = DTy.getTypeDerivedFrom()) + if (DITypeRef Deriv = DTy->getBaseType()) return isUnsignedDIType(DD, DD->resolve(Deriv)); // FIXME: Enums without a fixed underlying type have unknown signedness // here, leading to incorrectly emitted constants. - assert(DTy.getTag() == dwarf::DW_TAG_enumeration_type); + assert(DTy->getTag() == dwarf::DW_TAG_enumeration_type); return false; } - DIBasicType BTy(Ty); - assert(BTy.isBasicType()); - unsigned Encoding = BTy.getEncoding(); + DIBasicType BTy = cast<MDBasicType>(Ty); + unsigned Encoding = BTy->getEncoding(); assert((Encoding == dwarf::DW_ATE_unsigned || Encoding == dwarf::DW_ATE_unsigned_char || Encoding == dwarf::DW_ATE_signed || Encoding == dwarf::DW_ATE_signed_char || - Encoding == dwarf::DW_ATE_float || - Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean || - (Ty.getTag() == dwarf::DW_TAG_unspecified_type && - Ty.getName() == "decltype(nullptr)")) && + Encoding == dwarf::DW_ATE_float || Encoding == dwarf::DW_ATE_UTF || + Encoding == dwarf::DW_ATE_boolean || + (Ty->getTag() == dwarf::DW_TAG_unspecified_type && + Ty->getName() == "decltype(nullptr)")) && "Unsupported encoding"); - return (Encoding == dwarf::DW_ATE_unsigned || - Encoding == dwarf::DW_ATE_unsigned_char || - Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean || - Ty.getTag() == dwarf::DW_TAG_unspecified_type); + return Encoding == dwarf::DW_ATE_unsigned || + Encoding == dwarf::DW_ATE_unsigned_char || + Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean || + Ty->getTag() == dwarf::DW_TAG_unspecified_type; } /// If this type is derived from a base type then return base type size. static uint64_t getBaseTypeSize(DwarfDebug *DD, DIDerivedType Ty) { - unsigned Tag = Ty.getTag(); + unsigned Tag = Ty->getTag(); if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef && Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_restrict_type) - return Ty.getSizeInBits(); + return Ty->getSizeInBits(); - DIType BaseType = DD->resolve(Ty.getTypeDerivedFrom()); + auto *BaseType = DD->resolve(Ty->getBaseType()); - assert(BaseType.isValid() && "Unexpected invalid base type"); + assert(BaseType && "Unexpected invalid base type"); // If this is a derived type, go ahead and get the base type, unless it's a // reference then it's just the size of the field. Pointer types have no need // of this since they're a different type of qualification on the type. - if (BaseType.getTag() == dwarf::DW_TAG_reference_type || - BaseType.getTag() == dwarf::DW_TAG_rvalue_reference_type) - return Ty.getSizeInBits(); + if (BaseType->getTag() == dwarf::DW_TAG_reference_type || + BaseType->getTag() == dwarf::DW_TAG_rvalue_reference_type) + return Ty->getSizeInBits(); - if (BaseType.isDerivedType()) - return getBaseTypeSize(DD, DIDerivedType(BaseType)); + if (auto *DT = dyn_cast<MDDerivedTypeBase>(BaseType)) + return getBaseTypeSize(DD, DT); - return BaseType.getSizeInBits(); + return BaseType->getSizeInBits(); } /// addConstantFPValue - Add constant value entry in variable DIE. @@ -771,39 +763,37 @@ void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) { /// addTemplateParams - Add template parameters into buffer. void DwarfUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { // Add template parameters. - for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) { - DIDescriptor Element = TParams.getElement(i); - if (Element.isTemplateTypeParameter()) - constructTemplateTypeParameterDIE(Buffer, - DITemplateTypeParameter(Element)); - else if (Element.isTemplateValueParameter()) - constructTemplateValueParameterDIE(Buffer, - DITemplateValueParameter(Element)); + for (unsigned i = 0, e = TParams.size(); i != e; ++i) { + DIDescriptor Element = TParams[i]; + if (auto *TTP = dyn_cast<MDTemplateTypeParameter>(Element)) + constructTemplateTypeParameterDIE(Buffer, TTP); + else if (auto *TVP = dyn_cast<MDTemplateValueParameter>(Element)) + constructTemplateValueParameterDIE(Buffer, TVP); } } /// getOrCreateContextDIE - Get context owner's DIE. DIE *DwarfUnit::getOrCreateContextDIE(DIScope Context) { - if (!Context || Context.isFile()) + if (!Context || isa<MDFile>(Context)) return &getUnitDie(); - if (Context.isType()) - return getOrCreateTypeDIE(DIType(Context)); - if (Context.isNameSpace()) - return getOrCreateNameSpace(DINameSpace(Context)); - if (Context.isSubprogram()) - return getOrCreateSubprogramDIE(DISubprogram(Context)); + if (auto *T = dyn_cast<MDType>(Context)) + return getOrCreateTypeDIE(T); + if (auto *NS = dyn_cast<MDNamespace>(Context)) + return getOrCreateNameSpace(NS); + if (auto *SP = dyn_cast<MDSubprogram>(Context)) + return getOrCreateSubprogramDIE(SP); return getDIE(Context); } DIE *DwarfUnit::createTypeDIE(DICompositeType Ty) { - DIScope Context = resolve(Ty.getContext()); + DIScope Context = resolve(Ty->getScope()); DIE *ContextDIE = getOrCreateContextDIE(Context); if (DIE *TyDIE = getDIE(Ty)) return TyDIE; // Create new type. - DIE &TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty); + DIE &TyDIE = createAndAddDIE(Ty->getTag(), *ContextDIE, Ty); constructTypeDIE(TyDIE, Ty); @@ -817,18 +807,18 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) { if (!TyNode) return nullptr; - DIType Ty(TyNode); - assert(Ty.isType()); - assert(Ty == resolve(Ty.getRef()) && + auto *Ty = cast<MDType>(TyNode); + assert(Ty == resolve(Ty->getRef()) && "type was not uniqued, possible ODR violation."); // DW_TAG_restrict_type is not supported in DWARF2 - if (Ty.getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2) - return getOrCreateTypeDIE(resolve(DIDerivedType(Ty).getTypeDerivedFrom())); + if (Ty->getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2) + return getOrCreateTypeDIE( + resolve(DITypeRef(cast<MDDerivedType>(Ty)->getBaseType()))); // Construct the context before querying for the existence of the DIE in case // such construction creates the DIE. - DIScope Context = resolve(Ty.getContext()); + DIScope Context = resolve(Ty->getScope()); DIE *ContextDIE = getOrCreateContextDIE(Context); assert(ContextDIE); @@ -836,24 +826,22 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) { return TyDIE; // Create new type. - DIE &TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty); + DIE &TyDIE = createAndAddDIE(Ty->getTag(), *ContextDIE, Ty); updateAcceleratorTables(Context, Ty, TyDIE); - if (Ty.isBasicType()) - constructTypeDIE(TyDIE, DIBasicType(Ty)); - else if (Ty.isCompositeType()) { - DICompositeType CTy(Ty); - if (GenerateDwarfTypeUnits && !Ty.isForwardDecl()) - if (MDString *TypeId = CTy.getIdentifier()) { + if (auto *BT = dyn_cast<MDBasicType>(Ty)) + constructTypeDIE(TyDIE, BT); + else if (DICompositeType CTy = dyn_cast<MDCompositeTypeBase>(Ty)) { + if (GenerateDwarfTypeUnits && !Ty->isForwardDecl()) + if (MDString *TypeId = CTy->getRawIdentifier()) { DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy); // Skip updating the accelerator tables since this is not the full type. return &TyDIE; } constructTypeDIE(TyDIE, CTy); } else { - assert(Ty.isDerivedType() && "Unknown kind of DIType"); - constructTypeDIE(TyDIE, DIDerivedType(Ty)); + constructTypeDIE(TyDIE, cast<MDDerivedType>(Ty)); } return &TyDIE; @@ -861,19 +849,18 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) { void DwarfUnit::updateAcceleratorTables(DIScope Context, DIType Ty, const DIE &TyDIE) { - if (!Ty.getName().empty() && !Ty.isForwardDecl()) { + if (!Ty->getName().empty() && !Ty->isForwardDecl()) { bool IsImplementation = 0; - if (Ty.isCompositeType()) { - DICompositeType CT(Ty); + if (auto *CT = dyn_cast<MDCompositeTypeBase>(Ty)) { // A runtime language of 0 actually means C/C++ and that any // non-negative value is some version of Objective-C/C++. - IsImplementation = (CT.getRunTimeLang() == 0) || CT.isObjcClassComplete(); + IsImplementation = CT->getRuntimeLang() == 0 || CT->isObjcClassComplete(); } unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0; - DD->addAccelType(Ty.getName(), TyDIE, Flags); + DD->addAccelType(Ty->getName(), TyDIE, Flags); - if (!Context || Context.isCompileUnit() || Context.isFile() || - Context.isNameSpace()) + if (!Context || isa<MDCompileUnit>(Context) || isa<MDFile>(Context) || + isa<MDNamespace>(Context)) addGlobalType(Ty, TyDIE, Context); } } @@ -914,10 +901,10 @@ std::string DwarfUnit::getParentContextString(DIScope Context) const { std::string CS; SmallVector<DIScope, 1> Parents; - while (!Context.isCompileUnit()) { + while (!isa<MDCompileUnit>(Context)) { Parents.push_back(Context); - if (Context.getContext()) - Context = resolve(Context.getContext()); + if (Context->getScope()) + Context = resolve(Context->getScope()); else // Structure, etc types will have a NULL context if they're at the top // level. @@ -929,9 +916,9 @@ std::string DwarfUnit::getParentContextString(DIScope Context) const { for (SmallVectorImpl<DIScope>::reverse_iterator I = Parents.rbegin(), E = Parents.rend(); I != E; ++I) { - DIScope Ctx = *I; - StringRef Name = Ctx.getName(); - if (Name.empty() && Ctx.isNameSpace()) + const MDScope *Ctx = *I; + StringRef Name = Ctx->getName(); + if (Name.empty() && isa<MDNamespace>(Ctx)) Name = "(anonymous namespace)"; if (!Name.empty()) { CS += Name; @@ -944,31 +931,31 @@ std::string DwarfUnit::getParentContextString(DIScope Context) const { /// constructTypeDIE - Construct basic type die from DIBasicType. void DwarfUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { // Get core information. - StringRef Name = BTy.getName(); + StringRef Name = BTy->getName(); // Add name if not anonymous or intermediate type. if (!Name.empty()) addString(Buffer, dwarf::DW_AT_name, Name); // An unspecified type only has a name attribute. - if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) + if (BTy->getTag() == dwarf::DW_TAG_unspecified_type) return; addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, - BTy.getEncoding()); + BTy->getEncoding()); - uint64_t Size = BTy.getSizeInBits() >> 3; + uint64_t Size = BTy->getSizeInBits() >> 3; addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); } /// constructTypeDIE - Construct derived type die from DIDerivedType. void DwarfUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { // Get core information. - StringRef Name = DTy.getName(); - uint64_t Size = DTy.getSizeInBits() >> 3; + StringRef Name = DTy->getName(); + uint64_t Size = DTy->getSizeInBits() >> 3; uint16_t Tag = Buffer.getTag(); // Map to main type, void will not have a type. - DIType FromTy = resolve(DTy.getTypeDerivedFrom()); + DIType FromTy = resolve(DTy->getBaseType()); if (FromTy) addType(Buffer, FromTy); @@ -982,24 +969,25 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); if (Tag == dwarf::DW_TAG_ptr_to_member_type) - addDIEEntry(Buffer, dwarf::DW_AT_containing_type, - *getOrCreateTypeDIE(resolve(DTy.getClassType()))); + addDIEEntry( + Buffer, dwarf::DW_AT_containing_type, + *getOrCreateTypeDIE(resolve(cast<MDDerivedType>(DTy)->getClassType()))); // Add source line info if available and TyDesc is not a forward declaration. - if (!DTy.isForwardDecl()) + if (!DTy->isForwardDecl()) addSourceLine(Buffer, DTy); } /// constructSubprogramArguments - Construct function argument DIEs. void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeArray Args) { - for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { - DIType Ty = resolve(Args.getElement(i)); + for (unsigned i = 1, N = Args.size(); i < N; ++i) { + DIType Ty = resolve(Args[i]); if (!Ty) { assert(i == N-1 && "Unspecified parameter must be the last argument"); createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer); } else { DIE &Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer); addType(Arg, Ty); - if (Ty.isArtificial()) + if (Ty->isArtificial()) addFlag(Arg, dwarf::DW_AT_artificial); } } @@ -1008,9 +996,9 @@ void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeArray Args) { /// constructTypeDIE - Construct type DIE from DICompositeType. void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Add name if not anonymous or intermediate type. - StringRef Name = CTy.getName(); + StringRef Name = CTy->getName(); - uint64_t Size = CTy.getSizeInBits() >> 3; + uint64_t Size = CTy->getSizeInBits() >> 3; uint16_t Tag = Buffer.getTag(); switch (Tag) { @@ -1022,14 +1010,13 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { break; case dwarf::DW_TAG_subroutine_type: { // Add return type. A void return won't have a type. - DITypeArray Elements = DISubroutineType(CTy).getTypeArray(); - DIType RTy(resolve(Elements.getElement(0))); - if (RTy) - addType(Buffer, RTy); + auto Elements = cast<MDSubroutineType>(CTy)->getTypeArray(); + if (Elements.size()) + if (auto RTy = resolve(Elements[0])) + addType(Buffer, RTy); bool isPrototyped = true; - if (Elements.getNumElements() == 2 && - !Elements.getElement(1)) + if (Elements.size() == 2 && !Elements[1]) isPrototyped = false; constructSubprogramArguments(Buffer, Elements); @@ -1042,60 +1029,46 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { Language == dwarf::DW_LANG_ObjC)) addFlag(Buffer, dwarf::DW_AT_prototyped); - if (CTy.isLValueReference()) + if (CTy->isLValueReference()) addFlag(Buffer, dwarf::DW_AT_reference); - if (CTy.isRValueReference()) + if (CTy->isRValueReference()) addFlag(Buffer, dwarf::DW_AT_rvalue_reference); } break; case dwarf::DW_TAG_structure_type: case dwarf::DW_TAG_union_type: case dwarf::DW_TAG_class_type: { // Add elements to structure type. - DIArray Elements = CTy.getElements(); - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIDescriptor Element = Elements.getElement(i); - if (Element.isSubprogram()) - getOrCreateSubprogramDIE(DISubprogram(Element)); - else if (Element.isDerivedType()) { - DIDerivedType DDTy(Element); - if (DDTy.getTag() == dwarf::DW_TAG_friend) { + DIArray Elements = CTy->getElements(); + for (unsigned i = 0, N = Elements.size(); i < N; ++i) { + DIDescriptor Element = Elements[i]; + if (!Element) + continue; + if (auto *SP = dyn_cast<MDSubprogram>(Element)) + getOrCreateSubprogramDIE(SP); + else if (DIDerivedType DDTy = dyn_cast<MDDerivedTypeBase>(Element)) { + if (DDTy->getTag() == dwarf::DW_TAG_friend) { DIE &ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer); - addType(ElemDie, resolve(DDTy.getTypeDerivedFrom()), - dwarf::DW_AT_friend); - } else if (DDTy.isStaticMember()) { + addType(ElemDie, resolve(DDTy->getBaseType()), dwarf::DW_AT_friend); + } else if (DDTy->isStaticMember()) { getOrCreateStaticMemberDIE(DDTy); } else { constructMemberDIE(Buffer, DDTy); } - } else if (Element.isObjCProperty()) { - DIObjCProperty Property(Element); - DIE &ElemDie = createAndAddDIE(Property.getTag(), Buffer); - StringRef PropertyName = Property.getObjCPropertyName(); + } else if (DIObjCProperty Property = dyn_cast<MDObjCProperty>(Element)) { + DIE &ElemDie = createAndAddDIE(Property->getTag(), Buffer); + StringRef PropertyName = Property->getName(); addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName); - if (Property.getType()) - addType(ElemDie, Property.getType()); + if (Property->getType()) + addType(ElemDie, Property->getType()); addSourceLine(ElemDie, Property); - StringRef GetterName = Property.getObjCPropertyGetterName(); + StringRef GetterName = Property->getGetterName(); if (!GetterName.empty()) addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName); - StringRef SetterName = Property.getObjCPropertySetterName(); + StringRef SetterName = Property->getSetterName(); if (!SetterName.empty()) addString(ElemDie, dwarf::DW_AT_APPLE_property_setter, SetterName); - unsigned PropertyAttributes = 0; - if (Property.isReadOnlyObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly; - if (Property.isReadWriteObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite; - if (Property.isAssignObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign; - if (Property.isRetainObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain; - if (Property.isCopyObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy; - if (Property.isNonAtomicObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic; - if (PropertyAttributes) + if (unsigned PropertyAttributes = Property->getAttributes()) addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, None, PropertyAttributes); @@ -1104,28 +1077,27 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { Entry = createDIEEntry(ElemDie); insertDIEEntry(Element, Entry); } - } else - continue; + } } - if (CTy.isAppleBlockExtension()) + if (CTy->isAppleBlockExtension()) addFlag(Buffer, dwarf::DW_AT_APPLE_block); // This is outside the DWARF spec, but GDB expects a DW_AT_containing_type // inside C++ composite types to point to the base class with the vtable. - DICompositeType ContainingType(resolve(CTy.getContainingType())); - if (ContainingType) + if (DICompositeType ContainingType = + dyn_cast_or_null<MDCompositeType>(resolve(CTy->getVTableHolder()))) addDIEEntry(Buffer, dwarf::DW_AT_containing_type, *getOrCreateTypeDIE(ContainingType)); - if (CTy.isObjcClassComplete()) + if (CTy->isObjcClassComplete()) addFlag(Buffer, dwarf::DW_AT_APPLE_objc_complete_type); // Add template parameters to a class, structure or union types. // FIXME: The support isn't in the metadata for this yet. if (Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) - addTemplateParams(Buffer, CTy.getTemplateParams()); + addTemplateParams(Buffer, CTy->getTemplateParams()); break; } @@ -1144,20 +1116,20 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // TODO: Do we care about size for enum forward declarations? if (Size) addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); - else if (!CTy.isForwardDecl()) + else if (!CTy->isForwardDecl()) // Add zero size if it is not a forward declaration. addUInt(Buffer, dwarf::DW_AT_byte_size, None, 0); // If we're a forward decl, say so. - if (CTy.isForwardDecl()) + if (CTy->isForwardDecl()) addFlag(Buffer, dwarf::DW_AT_declaration); // Add source line info if available. - if (!CTy.isForwardDecl()) + if (!CTy->isForwardDecl()) addSourceLine(Buffer, CTy); // No harm in adding the runtime language to the declaration. - unsigned RLang = CTy.getRunTimeLang(); + unsigned RLang = CTy->getRuntimeLang(); if (RLang) addUInt(Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1, RLang); @@ -1171,10 +1143,10 @@ void DwarfUnit::constructTemplateTypeParameterDIE(DIE &Buffer, DIE &ParamDIE = createAndAddDIE(dwarf::DW_TAG_template_type_parameter, Buffer); // Add the type if it exists, it could be void and therefore no type. - if (TP.getType()) - addType(ParamDIE, resolve(TP.getType())); - if (!TP.getName().empty()) - addString(ParamDIE, dwarf::DW_AT_name, TP.getName()); + if (TP->getType()) + addType(ParamDIE, resolve(TP->getType())); + if (!TP->getName().empty()) + addString(ParamDIE, dwarf::DW_AT_name, TP->getName()); } /// constructTemplateValueParameterDIE - Construct new DIE for the given @@ -1182,17 +1154,17 @@ void DwarfUnit::constructTemplateTypeParameterDIE(DIE &Buffer, void DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer, DITemplateValueParameter VP) { - DIE &ParamDIE = createAndAddDIE(VP.getTag(), Buffer); + DIE &ParamDIE = createAndAddDIE(VP->getTag(), Buffer); // Add the type if there is one, template template and template parameter // packs will not have a type. - if (VP.getTag() == dwarf::DW_TAG_template_value_parameter) - addType(ParamDIE, resolve(VP.getType())); - if (!VP.getName().empty()) - addString(ParamDIE, dwarf::DW_AT_name, VP.getName()); - if (Metadata *Val = VP.getValue()) { + if (VP->getTag() == dwarf::DW_TAG_template_value_parameter) + addType(ParamDIE, resolve(VP->getType())); + if (!VP->getName().empty()) + addString(ParamDIE, dwarf::DW_AT_name, VP->getName()); + if (Metadata *Val = VP->getValue()) { if (ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Val)) - addConstantValue(ParamDIE, CI, resolve(VP.getType())); + addConstantValue(ParamDIE, CI, resolve(VP->getType())); else if (GlobalValue *GV = mdconst::dyn_extract<GlobalValue>(Val)) { // For declaration non-type template parameters (such as global values and // functions) @@ -1202,14 +1174,12 @@ DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer, // parameter, rather than a pointer to it. addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value); addBlock(ParamDIE, dwarf::DW_AT_location, Loc); - } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_template_param) { + } else if (VP->getTag() == dwarf::DW_TAG_GNU_template_template_param) { assert(isa<MDString>(Val)); addString(ParamDIE, dwarf::DW_AT_GNU_template_name, cast<MDString>(Val)->getString()); - } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) { - assert(isa<MDNode>(Val)); - DIArray A(cast<MDNode>(Val)); - addTemplateParams(ParamDIE, A); + } else if (VP->getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) { + addTemplateParams(ParamDIE, cast<MDTuple>(Val)); } } } @@ -1218,19 +1188,19 @@ DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer, DIE *DwarfUnit::getOrCreateNameSpace(DINameSpace NS) { // Construct the context before querying for the existence of the DIE in case // such construction creates the DIE. - DIE *ContextDIE = getOrCreateContextDIE(NS.getContext()); + DIE *ContextDIE = getOrCreateContextDIE(NS->getScope()); if (DIE *NDie = getDIE(NS)) return NDie; DIE &NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS); - StringRef Name = NS.getName(); + StringRef Name = NS->getName(); if (!Name.empty()) - addString(NDie, dwarf::DW_AT_name, NS.getName()); + addString(NDie, dwarf::DW_AT_name, NS->getName()); else Name = "(anonymous namespace)"; DD->addAccelNamespace(Name, NDie); - addGlobalName(Name, NDie, NS.getContext()); + addGlobalName(Name, NDie, NS->getScope()); addSourceLine(NDie, NS); return &NDie; } @@ -1241,12 +1211,12 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP, bool Minimal) { // such construction creates the DIE (as is the case for member function // declarations). DIE *ContextDIE = - Minimal ? &getUnitDie() : getOrCreateContextDIE(resolve(SP.getContext())); + Minimal ? &getUnitDie() : getOrCreateContextDIE(resolve(SP->getScope())); if (DIE *SPDie = getDIE(SP)) return SPDie; - if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { + if (auto *SPDecl = SP->getDeclaration()) { if (!Minimal) { // Add subprogram definitions to the CU die directly. ContextDIE = &getUnitDie(); @@ -1260,7 +1230,7 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP, bool Minimal) { // Stop here and fill this in later, depending on whether or not this // subprogram turns out to have inlined instances or not. - if (SP.isDefinition()) + if (SP->isDefinition()) return &SPDie; applySubprogramAttributes(SP, SPDie); @@ -1271,19 +1241,19 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(DISubprogram SP, DIE &SPDie) { DIE *DeclDie = nullptr; StringRef DeclLinkageName; - if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { + if (auto *SPDecl = SP->getDeclaration()) { DeclDie = getDIE(SPDecl); assert(DeclDie && "This DIE should've already been constructed when the " "definition DIE was created in " "getOrCreateSubprogramDIE"); - DeclLinkageName = SPDecl.getLinkageName(); + DeclLinkageName = SPDecl->getLinkageName(); } // Add function template parameters. - addTemplateParams(SPDie, SP.getTemplateParams()); + addTemplateParams(SPDie, SP->getTemplateParams()); // Add the linkage name if we have one and it isn't in the Decl. - StringRef LinkageName = SP.getLinkageName(); + StringRef LinkageName = SP->getLinkageName(); assert(((LinkageName.empty() || DeclLinkageName.empty()) || LinkageName == DeclLinkageName) && "decl has a linkage name and it is different"); @@ -1306,8 +1276,8 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie, return; // Constructors and operators for anonymous aggregates do not have names. - if (!SP.getName().empty()) - addString(SPDie, dwarf::DW_AT_name, SP.getName()); + if (!SP->getName().empty()) + addString(SPDie, dwarf::DW_AT_name, SP->getName()); // Skip the rest of the attributes under -gmlt to save space. if (Minimal) @@ -1318,33 +1288,34 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie, // Add the prototype if we have a prototype and we have a C like // language. uint16_t Language = getLanguage(); - if (SP.isPrototyped() && + if (SP->isPrototyped() && (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || Language == dwarf::DW_LANG_ObjC)) addFlag(SPDie, dwarf::DW_AT_prototyped); - DISubroutineType SPTy = SP.getType(); - assert(SPTy.getTag() == dwarf::DW_TAG_subroutine_type && + DISubroutineType SPTy = SP->getType(); + assert(SPTy->getTag() == dwarf::DW_TAG_subroutine_type && "the type of a subprogram should be a subroutine"); - DITypeArray Args = SPTy.getTypeArray(); + auto Args = SPTy->getTypeArray(); // Add a return type. If this is a type like a C/C++ void type we don't add a // return type. - if (resolve(Args.getElement(0))) - addType(SPDie, DIType(resolve(Args.getElement(0)))); + if (Args.size()) + if (auto Ty = resolve(Args[0])) + addType(SPDie, Ty); - unsigned VK = SP.getVirtuality(); + unsigned VK = SP->getVirtuality(); if (VK) { addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK); DIELoc *Block = getDIELoc(); addUInt(*Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(*Block, dwarf::DW_FORM_udata, SP.getVirtualIndex()); + addUInt(*Block, dwarf::DW_FORM_udata, SP->getVirtualIndex()); addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block); ContainingTypeMap.insert( - std::make_pair(&SPDie, resolve(SP.getContainingType()))); + std::make_pair(&SPDie, resolve(SP->getContainingType()))); } - if (!SP.isDefinition()) { + if (!SP->isDefinition()) { addFlag(SPDie, dwarf::DW_AT_declaration); // Add arguments. Do not add arguments for subprogram definition. They will @@ -1352,35 +1323,35 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie, constructSubprogramArguments(SPDie, Args); } - if (SP.isArtificial()) + if (SP->isArtificial()) addFlag(SPDie, dwarf::DW_AT_artificial); - if (!SP.isLocalToUnit()) + if (!SP->isLocalToUnit()) addFlag(SPDie, dwarf::DW_AT_external); - if (SP.isOptimized()) + if (SP->isOptimized()) addFlag(SPDie, dwarf::DW_AT_APPLE_optimized); if (unsigned isa = Asm->getISAEncoding()) addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); - if (SP.isLValueReference()) + if (SP->isLValueReference()) addFlag(SPDie, dwarf::DW_AT_reference); - if (SP.isRValueReference()) + if (SP->isRValueReference()) addFlag(SPDie, dwarf::DW_AT_rvalue_reference); - if (SP.isProtected()) + if (SP->isProtected()) addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_protected); - else if (SP.isPrivate()) + else if (SP->isPrivate()) addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_private); - else if (SP.isPublic()) + else if (SP->isPublic()) addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_public); - if (SP.isExplicit()) + if (SP->isExplicit()) addFlag(SPDie, dwarf::DW_AT_explicit); } @@ -1393,9 +1364,9 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) { // C/C++. The Count value is the number of elements. Values are 64 bit. If // Count == -1 then the array is unbounded and we do not emit // DW_AT_lower_bound and DW_AT_count attributes. - int64_t LowerBound = SR.getLo(); + int64_t LowerBound = SR->getLowerBound(); int64_t DefaultLowerBound = getDefaultLowerBound(); - int64_t Count = SR.getCount(); + int64_t Count = SR->getCount(); if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound) addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, None, LowerBound); @@ -1420,11 +1391,11 @@ DIE *DwarfUnit::getIndexTyDie() { /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) { - if (CTy.isVector()) + if (CTy->isVector()) addFlag(Buffer, dwarf::DW_AT_GNU_vector); // Emit the element type. - addType(Buffer, resolve(CTy.getTypeDerivedFrom())); + addType(Buffer, resolve(CTy->getBaseType())); // Get an anonymous type for index type. // FIXME: This type should be passed down from the front end @@ -1432,31 +1403,32 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) { DIE *IdxTy = getIndexTyDie(); // Add subranges to array type. - DIArray Elements = CTy.getElements(); - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIDescriptor Element = Elements.getElement(i); - if (Element.getTag() == dwarf::DW_TAG_subrange_type) - constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy); + DIArray Elements = CTy->getElements(); + for (unsigned i = 0, N = Elements.size(); i < N; ++i) { + // FIXME: Should this really be such a loose cast? + if (auto *Element = dyn_cast_or_null<DebugNode>(Elements[i])) + if (Element->getTag() == dwarf::DW_TAG_subrange_type) + constructSubrangeDIE(Buffer, cast<MDSubrange>(Element), IdxTy); } } /// constructEnumTypeDIE - Construct an enum type DIE from DICompositeType. void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) { - DIArray Elements = CTy.getElements(); + DIArray Elements = CTy->getElements(); // Add enumerators to enumeration type. - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIEnumerator Enum(Elements.getElement(i)); - if (Enum.isEnumerator()) { + for (unsigned i = 0, N = Elements.size(); i < N; ++i) { + auto *Enum = dyn_cast_or_null<MDEnumerator>(Elements[i]); + if (Enum) { DIE &Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer); - StringRef Name = Enum.getName(); + StringRef Name = Enum->getName(); addString(Enumerator, dwarf::DW_AT_name, Name); - int64_t Value = Enum.getEnumValue(); + int64_t Value = Enum->getValue(); addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); } } - DIType DTy = resolve(CTy.getTypeDerivedFrom()); + DIType DTy = resolve(CTy->getBaseType()); if (DTy) { addType(Buffer, DTy); addFlag(Buffer, dwarf::DW_AT_enum_class); @@ -1481,17 +1453,20 @@ void DwarfUnit::constructContainingTypeDIEs() { } /// constructMemberDIE - Construct member DIE from DIDerivedType. -void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { - DIE &MemberDie = createAndAddDIE(DT.getTag(), Buffer); - StringRef Name = DT.getName(); +void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT_) { + // Downcast to MDDerivedType. + const MDDerivedType *DT = cast<MDDerivedType>(DT_); + + DIE &MemberDie = createAndAddDIE(DT->getTag(), Buffer); + StringRef Name = DT->getName(); if (!Name.empty()) addString(MemberDie, dwarf::DW_AT_name, Name); - addType(MemberDie, resolve(DT.getTypeDerivedFrom())); + addType(MemberDie, resolve(DT->getBaseType())); addSourceLine(MemberDie, DT); - if (DT.getTag() == dwarf::DW_TAG_inheritance && DT.isVirtual()) { + if (DT->getTag() == dwarf::DW_TAG_inheritance && DT->isVirtual()) { // For C++, virtual base classes are not at fixed offset. Use following // expression to extract appropriate offset from vtable. @@ -1501,14 +1476,14 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(*VBaseLocationDie, dwarf::DW_FORM_udata, DT.getOffsetInBits()); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_udata, DT->getOffsetInBits()); addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); addBlock(MemberDie, dwarf::DW_AT_data_member_location, VBaseLocationDie); } else { - uint64_t Size = DT.getSizeInBits(); + uint64_t Size = DT->getSizeInBits(); uint64_t FieldSize = getBaseTypeSize(DD, DT); uint64_t OffsetInBytes; @@ -1517,8 +1492,8 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8); addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size); - uint64_t Offset = DT.getOffsetInBits(); - uint64_t AlignMask = ~(DT.getAlignInBits() - 1); + uint64_t Offset = DT->getOffsetInBits(); + uint64_t AlignMask = ~(DT->getAlignInBits() - 1); uint64_t HiMark = (Offset + FieldSize) & AlignMask; uint64_t FieldOffset = (HiMark - FieldSize); Offset -= FieldOffset; @@ -1533,7 +1508,7 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { OffsetInBytes = FieldOffset >> 3; } else // This is not a bitfield. - OffsetInBytes = DT.getOffsetInBits() >> 3; + OffsetInBytes = DT->getOffsetInBits() >> 3; if (DD->getDwarfVersion() <= 2) { DIELoc *MemLocationDie = new (DIEValueAllocator) DIELoc(); @@ -1545,49 +1520,50 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { OffsetInBytes); } - if (DT.isProtected()) + if (DT->isProtected()) addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_protected); - else if (DT.isPrivate()) + else if (DT->isPrivate()) addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_private); // Otherwise C++ member and base classes are considered public. - else if (DT.isPublic()) + else if (DT->isPublic()) addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_public); - if (DT.isVirtual()) + if (DT->isVirtual()) addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, dwarf::DW_VIRTUALITY_virtual); // Objective-C properties. - if (MDNode *PNode = DT.getObjCProperty()) + if (MDNode *PNode = DT->getObjCProperty()) if (DIEEntry *PropertyDie = getDIEEntry(PNode)) MemberDie.addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4, PropertyDie); - if (DT.isArtificial()) + if (DT->isArtificial()) addFlag(MemberDie, dwarf::DW_AT_artificial); } /// getOrCreateStaticMemberDIE - Create new DIE for C++ static member. -DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) { - if (!DT.Verify()) +DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT_) { + const MDDerivedType *DT = cast_or_null<MDDerivedType>(DT_); + if (!DT) return nullptr; // Construct the context before querying for the existence of the DIE in case // such construction creates the DIE. - DIE *ContextDIE = getOrCreateContextDIE(resolve(DT.getContext())); + DIE *ContextDIE = getOrCreateContextDIE(resolve(DT->getScope())); assert(dwarf::isType(ContextDIE->getTag()) && "Static member should belong to a type."); if (DIE *StaticMemberDIE = getDIE(DT)) return StaticMemberDIE; - DIE &StaticMemberDIE = createAndAddDIE(DT.getTag(), *ContextDIE, DT); + DIE &StaticMemberDIE = createAndAddDIE(DT->getTag(), *ContextDIE, DT); - DIType Ty = resolve(DT.getTypeDerivedFrom()); + DIType Ty = resolve(DT->getBaseType()); - addString(StaticMemberDIE, dwarf::DW_AT_name, DT.getName()); + addString(StaticMemberDIE, dwarf::DW_AT_name, DT->getName()); addType(StaticMemberDIE, Ty); addSourceLine(StaticMemberDIE, DT); addFlag(StaticMemberDIE, dwarf::DW_AT_external); @@ -1595,19 +1571,19 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) { // FIXME: We could omit private if the parent is a class_type, and // public if the parent is something else. - if (DT.isProtected()) + if (DT->isProtected()) addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_protected); - else if (DT.isPrivate()) + else if (DT->isPrivate()) addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_private); - else if (DT.isPublic()) + else if (DT->isPublic()) addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_public); - if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT.getConstant())) + if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT->getConstant())) addConstantValue(StaticMemberDIE, CI, Ty); - if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT.getConstant())) + if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT->getConstant())) addConstantFPValue(StaticMemberDIE, CFP); return &StaticMemberDIE; diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h index 81c5821..b354255 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -141,7 +141,7 @@ public: // Accessors. AsmPrinter* getAsmPrinter() const { return Asm; } unsigned getUniqueID() const { return UniqueID; } - uint16_t getLanguage() const { return CUNode.getLanguage(); } + uint16_t getLanguage() const { return CUNode->getSourceLanguage(); } DICompileUnit getCUNode() const { return CUNode; } DIE &getUnitDie() { return UnitDie; } @@ -342,7 +342,7 @@ protected: /// resolve - Look in the DwarfDebug map for the MDNode that /// corresponds to the reference. - template <typename T> T resolve(DIRef<T> Ref) const { + template <typename T> T *resolve(TypedDebugNodeRef<T> Ref) const { return DD->resolve(Ref); } diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 14df4c9..6f64d8f 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -188,20 +188,12 @@ bool EHStreamer::callToNoUnwindFunction(const MachineInstr *MI) { return MarkedNoUnwind; } -/// Compute the call-site table. The entry for an invoke has a try-range -/// containing the call, a non-zero landing pad, and an appropriate action. The -/// entry for an ordinary call has a try-range containing the call and zero for -/// the landing pad and the action. Calls marked 'nounwind' have no entry and -/// must not be contained in the try-range of any entry - they form gaps in the -/// table. Entries must be ordered by try-range address. -void EHStreamer:: -computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, - const SmallVectorImpl<const LandingPadInfo *> &LandingPads, - const SmallVectorImpl<unsigned> &FirstActions) { +void EHStreamer::computePadMap( + const SmallVectorImpl<const LandingPadInfo *> &LandingPads, + RangeMapType &PadMap) { // Invokes and nounwind calls have entries in PadMap (due to being bracketed // by try-range labels when lowered). Ordinary calls do not, so appropriate // try-ranges for them need be deduced so we can put them in the LSDA. - RangeMapType PadMap; for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { const LandingPadInfo *LandingPad = LandingPads[i]; for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) { @@ -211,6 +203,20 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, PadMap[BeginLabel] = P; } } +} + +/// Compute the call-site table. The entry for an invoke has a try-range +/// containing the call, a non-zero landing pad, and an appropriate action. The +/// entry for an ordinary call has a try-range containing the call and zero for +/// the landing pad and the action. Calls marked 'nounwind' have no entry and +/// must not be contained in the try-range of any entry - they form gaps in the +/// table. Entries must be ordered by try-range address. +void EHStreamer:: +computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, + const SmallVectorImpl<const LandingPadInfo *> &LandingPads, + const SmallVectorImpl<unsigned> &FirstActions) { + RangeMapType PadMap; + computePadMap(LandingPads, PadMap); // The end label of the previous invoke or nounwind try-range. MCSymbol *LastLabel = nullptr; diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h index 94d0585..65973fa 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.h +++ b/lib/CodeGen/AsmPrinter/EHStreamer.h @@ -80,13 +80,15 @@ protected: /// `false' otherwise. bool callToNoUnwindFunction(const MachineInstr *MI); + void computePadMap(const SmallVectorImpl<const LandingPadInfo *> &LandingPads, + RangeMapType &PadMap); + /// Compute the call-site table. The entry for an invoke has a try-range /// containing the call, a non-zero landing pad and an appropriate action. /// The entry for an ordinary call has a try-range containing the call and /// zero for the landing pad and the action. Calls marked 'nounwind' have /// no entry and must not be contained in the try-range of any entry - they /// form gaps in the table. Entries must be ordered by try-range address. - void computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, const SmallVectorImpl<const LandingPadInfo *> &LPs, const SmallVectorImpl<unsigned> &FirstActions); @@ -123,7 +125,7 @@ protected: public: EHStreamer(AsmPrinter *A); - virtual ~EHStreamer(); + ~EHStreamer() override; // Unused. void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp index 7d76ead..f89d364 100644 --- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp +++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" @@ -67,6 +68,27 @@ void Win64Exception::beginFunction(const MachineFunction *MF) { shouldEmitLSDA = shouldEmitPersonality && LSDAEncoding != dwarf::DW_EH_PE_omit; + + // If this was an outlined handler, we need to define the label corresponding + // to the offset of the parent frame relative to the stack pointer after the + // prologue. + const Function *F = MF->getFunction(); + const Function *ParentF = MMI->getWinEHParent(F); + if (F != ParentF) { + WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(ParentF); + auto I = FuncInfo.CatchHandlerParentFrameObjOffset.find(F); + if (I != FuncInfo.CatchHandlerParentFrameObjOffset.end()) { + MCSymbol *HandlerTypeParentFrameOffset = + Asm->OutContext.getOrCreateParentFrameOffsetSymbol( + GlobalValue::getRealLinkageName(F->getName())); + + // Emit a symbol assignment. + Asm->OutStreamer.EmitAssignment( + HandlerTypeParentFrameOffset, + MCConstantExpr::Create(I->second, Asm->OutContext)); + } + } + if (!shouldEmitPersonality && !shouldEmitMoves) return; @@ -82,12 +104,17 @@ void Win64Exception::beginFunction(const MachineFunction *MF) { /// endFunction - Gather and emit post-function exception information. /// -void Win64Exception::endFunction(const MachineFunction *) { +void Win64Exception::endFunction(const MachineFunction *MF) { if (!shouldEmitPersonality && !shouldEmitMoves) return; - // Map all labels and get rid of any dead landing pads. - MMI->TidyLandingPads(); + EHPersonality Per = MMI->getPersonalityType(); + + // Get rid of any dead landing pads if we're not using a Windows EH scheme. In + // Windows EH schemes, the landing pad is not actually reachable. It only + // exists so that we can emit the right table data. + if (!isMSVCEHPersonality(Per)) + MMI->TidyLandingPads(); if (shouldEmitPersonality) { Asm->OutStreamer.PushSection(); @@ -97,9 +124,10 @@ void Win64Exception::endFunction(const MachineFunction *) { // Emit the tables appropriate to the personality function in use. If we // don't recognize the personality, assume it uses an Itanium-style LSDA. - EHPersonality Per = MMI->getPersonalityType(); if (Per == EHPersonality::MSVC_Win64SEH) emitCSpecificHandlerTable(); + else if (Per == EHPersonality::MSVC_CXX) + emitCXXFrameHandler3Table(MF); else emitExceptionTable(); @@ -108,11 +136,19 @@ void Win64Exception::endFunction(const MachineFunction *) { Asm->OutStreamer.EmitWinCFIEndProc(); } -const MCSymbolRefExpr *Win64Exception::createImageRel32(const MCSymbol *Value) { +const MCExpr *Win64Exception::createImageRel32(const MCSymbol *Value) { + if (!Value) + return MCConstantExpr::Create(0, Asm->OutContext); return MCSymbolRefExpr::Create(Value, MCSymbolRefExpr::VK_COFF_IMGREL32, Asm->OutContext); } +const MCExpr *Win64Exception::createImageRel32(const GlobalValue *GV) { + if (!GV) + return MCConstantExpr::Create(0, Asm->OutContext); + return createImageRel32(Asm->getSymbol(GV)); +} + /// Emit the language-specific data that __C_specific_handler expects. This /// handler lives in the x64 Microsoft C runtime and allows catching or cleaning /// up after faults with __try, __except, and __finally. The typeinfo values @@ -237,3 +273,231 @@ void Win64Exception::emitCSpecificHandlerTable() { } } } + +void Win64Exception::emitCXXFrameHandler3Table(const MachineFunction *MF) { + const Function *F = MF->getFunction(); + const Function *ParentF = MMI->getWinEHParent(F); + auto &OS = Asm->OutStreamer; + WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(ParentF); + + StringRef ParentLinkageName = + GlobalValue::getRealLinkageName(ParentF->getName()); + + MCSymbol *FuncInfoXData = + Asm->OutContext.GetOrCreateSymbol(Twine("$cppxdata$", ParentLinkageName)); + OS.EmitValue(createImageRel32(FuncInfoXData), 4); + + // The Itanium LSDA table sorts similar landing pads together to simplify the + // actions table, but we don't need that. + SmallVector<const LandingPadInfo *, 64> LandingPads; + const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); + LandingPads.reserve(PadInfos.size()); + for (const auto &LP : PadInfos) + LandingPads.push_back(&LP); + + RangeMapType PadMap; + computePadMap(LandingPads, PadMap); + + // The end label of the previous invoke or nounwind try-range. + MCSymbol *LastLabel = Asm->getFunctionBegin(); + + // Whether there is a potentially throwing instruction (currently this means + // an ordinary call) between the end of the previous try-range and now. + bool SawPotentiallyThrowing = false; + + int LastEHState = -2; + + // The parent function and the catch handlers contribute to the 'ip2state' + // table. + for (const auto &MBB : *MF) { + for (const auto &MI : MBB) { + if (!MI.isEHLabel()) { + if (MI.isCall()) + SawPotentiallyThrowing |= !callToNoUnwindFunction(&MI); + continue; + } + + // End of the previous try-range? + MCSymbol *BeginLabel = MI.getOperand(0).getMCSymbol(); + if (BeginLabel == LastLabel) + SawPotentiallyThrowing = false; + + // Beginning of a new try-range? + RangeMapType::const_iterator L = PadMap.find(BeginLabel); + if (L == PadMap.end()) + // Nope, it was just some random label. + continue; + + const PadRange &P = L->second; + const LandingPadInfo *LandingPad = LandingPads[P.PadIndex]; + assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] && + "Inconsistent landing pad map!"); + + if (SawPotentiallyThrowing) { + FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, -1)); + SawPotentiallyThrowing = false; + LastEHState = -1; + } + + if (LandingPad->WinEHState != LastEHState) + FuncInfo.IPToStateList.push_back( + std::make_pair(BeginLabel, LandingPad->WinEHState)); + LastEHState = LandingPad->WinEHState; + LastLabel = LandingPad->EndLabels[P.RangeIndex]; + } + } + + // Defer emission until we've visited the parent function and all the catch + // handlers. Cleanups don't contribute to the ip2state table yet, so don't + // count them. + if (ParentF != F && !FuncInfo.CatchHandlerMaxState.count(F)) + return; + ++FuncInfo.NumIPToStateFuncsVisited; + if (FuncInfo.NumIPToStateFuncsVisited != FuncInfo.CatchHandlerMaxState.size()) + return; + + MCSymbol *UnwindMapXData = nullptr; + MCSymbol *TryBlockMapXData = nullptr; + MCSymbol *IPToStateXData = nullptr; + if (!FuncInfo.UnwindMap.empty()) + UnwindMapXData = Asm->OutContext.GetOrCreateSymbol( + Twine("$stateUnwindMap$", ParentLinkageName)); + if (!FuncInfo.TryBlockMap.empty()) + TryBlockMapXData = Asm->OutContext.GetOrCreateSymbol( + Twine("$tryMap$", ParentLinkageName)); + if (!FuncInfo.IPToStateList.empty()) + IPToStateXData = Asm->OutContext.GetOrCreateSymbol( + Twine("$ip2state$", ParentLinkageName)); + + // FuncInfo { + // uint32_t MagicNumber + // int32_t MaxState; + // UnwindMapEntry *UnwindMap; + // uint32_t NumTryBlocks; + // TryBlockMapEntry *TryBlockMap; + // uint32_t IPMapEntries; + // IPToStateMapEntry *IPToStateMap; + // uint32_t UnwindHelp; // (x64/ARM only) + // ESTypeList *ESTypeList; + // int32_t EHFlags; + // } + // EHFlags & 1 -> Synchronous exceptions only, no async exceptions. + // EHFlags & 2 -> ??? + // EHFlags & 4 -> The function is noexcept(true), unwinding can't continue. + OS.EmitLabel(FuncInfoXData); + OS.EmitIntValue(0x19930522, 4); // MagicNumber + OS.EmitIntValue(FuncInfo.UnwindMap.size(), 4); // MaxState + OS.EmitValue(createImageRel32(UnwindMapXData), 4); // UnwindMap + OS.EmitIntValue(FuncInfo.TryBlockMap.size(), 4); // NumTryBlocks + OS.EmitValue(createImageRel32(TryBlockMapXData), 4); // TryBlockMap + OS.EmitIntValue(FuncInfo.IPToStateList.size(), 4); // IPMapEntries + OS.EmitValue(createImageRel32(IPToStateXData), 4); // IPToStateMap + OS.EmitIntValue(FuncInfo.UnwindHelpFrameOffset, 4); // UnwindHelp + OS.EmitIntValue(0, 4); // ESTypeList + OS.EmitIntValue(1, 4); // EHFlags + + // UnwindMapEntry { + // int32_t ToState; + // void (*Action)(); + // }; + if (UnwindMapXData) { + OS.EmitLabel(UnwindMapXData); + for (const WinEHUnwindMapEntry &UME : FuncInfo.UnwindMap) { + OS.EmitIntValue(UME.ToState, 4); // ToState + OS.EmitValue(createImageRel32(UME.Cleanup), 4); // Action + } + } + + // TryBlockMap { + // int32_t TryLow; + // int32_t TryHigh; + // int32_t CatchHigh; + // int32_t NumCatches; + // HandlerType *HandlerArray; + // }; + if (TryBlockMapXData) { + OS.EmitLabel(TryBlockMapXData); + SmallVector<MCSymbol *, 1> HandlerMaps; + for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) { + WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I]; + MCSymbol *HandlerMapXData = nullptr; + + if (!TBME.HandlerArray.empty()) + HandlerMapXData = + Asm->OutContext.GetOrCreateSymbol(Twine("$handlerMap$") + .concat(Twine(I)) + .concat("$") + .concat(ParentLinkageName)); + + HandlerMaps.push_back(HandlerMapXData); + + int CatchHigh = -1; + for (WinEHHandlerType &HT : TBME.HandlerArray) + CatchHigh = + std::max(CatchHigh, FuncInfo.CatchHandlerMaxState[HT.Handler]); + + assert(TBME.TryLow <= TBME.TryHigh); + assert(CatchHigh > TBME.TryHigh); + OS.EmitIntValue(TBME.TryLow, 4); // TryLow + OS.EmitIntValue(TBME.TryHigh, 4); // TryHigh + OS.EmitIntValue(CatchHigh, 4); // CatchHigh + OS.EmitIntValue(TBME.HandlerArray.size(), 4); // NumCatches + OS.EmitValue(createImageRel32(HandlerMapXData), 4); // HandlerArray + } + + for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) { + WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I]; + MCSymbol *HandlerMapXData = HandlerMaps[I]; + if (!HandlerMapXData) + continue; + // HandlerType { + // int32_t Adjectives; + // TypeDescriptor *Type; + // int32_t CatchObjOffset; + // void (*Handler)(); + // int32_t ParentFrameOffset; // x64 only + // }; + OS.EmitLabel(HandlerMapXData); + for (const WinEHHandlerType &HT : TBME.HandlerArray) { + MCSymbol *ParentFrameOffset = + Asm->OutContext.getOrCreateParentFrameOffsetSymbol( + GlobalValue::getRealLinkageName(HT.Handler->getName())); + const MCSymbolRefExpr *ParentFrameOffsetRef = MCSymbolRefExpr::Create( + ParentFrameOffset, MCSymbolRefExpr::VK_None, Asm->OutContext); + + // Get the frame escape label with the offset of the catch object. If + // the index is -1, then there is no catch object, and we should emit an + // offset of zero, indicating that no copy will occur. + const MCExpr *FrameAllocOffsetRef = nullptr; + if (HT.CatchObjRecoverIdx >= 0) { + MCSymbol *FrameAllocOffset = + Asm->OutContext.getOrCreateFrameAllocSymbol( + GlobalValue::getRealLinkageName(ParentF->getName()), + HT.CatchObjRecoverIdx); + FrameAllocOffsetRef = MCSymbolRefExpr::Create( + FrameAllocOffset, MCSymbolRefExpr::VK_None, Asm->OutContext); + } else { + FrameAllocOffsetRef = MCConstantExpr::Create(0, Asm->OutContext); + } + + OS.EmitIntValue(HT.Adjectives, 4); // Adjectives + OS.EmitValue(createImageRel32(HT.TypeDescriptor), 4); // Type + OS.EmitValue(FrameAllocOffsetRef, 4); // CatchObjOffset + OS.EmitValue(createImageRel32(HT.Handler), 4); // Handler + OS.EmitValue(ParentFrameOffsetRef, 4); // ParentFrameOffset + } + } + } + + // IPToStateMapEntry { + // void *IP; + // int32_t State; + // }; + if (IPToStateXData) { + OS.EmitLabel(IPToStateXData); + for (auto &IPStatePair : FuncInfo.IPToStateList) { + OS.EmitValue(createImageRel32(IPStatePair.first), 4); // IP + OS.EmitIntValue(IPStatePair.second, 4); // State + } + } +} diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.h b/lib/CodeGen/AsmPrinter/Win64Exception.h index b2d5d1b..5f4237f 100644 --- a/lib/CodeGen/AsmPrinter/Win64Exception.h +++ b/lib/CodeGen/AsmPrinter/Win64Exception.h @@ -17,7 +17,9 @@ #include "EHStreamer.h" namespace llvm { +class GlobalValue; class MachineFunction; +class MCExpr; class Win64Exception : public EHStreamer { /// Per-function flag to indicate if personality info should be emitted. @@ -31,14 +33,17 @@ class Win64Exception : public EHStreamer { void emitCSpecificHandlerTable(); - const MCSymbolRefExpr *createImageRel32(const MCSymbol *Value); + void emitCXXFrameHandler3Table(const MachineFunction *MF); + + const MCExpr *createImageRel32(const MCSymbol *Value); + const MCExpr *createImageRel32(const GlobalValue *GV); public: //===--------------------------------------------------------------------===// // Main entry points. // Win64Exception(AsmPrinter *A); - virtual ~Win64Exception(); + ~Win64Exception() override; /// Emit all exception information that should come after the content. void endModule() override; diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp index d2b4eec..276e7df 100644 --- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp +++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp @@ -20,14 +20,13 @@ namespace llvm { StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) { assert(S); - DIDescriptor D(S); - assert((D.isCompileUnit() || D.isFile() || D.isSubprogram() || - D.isLexicalBlockFile() || D.isLexicalBlock()) && + assert((isa<MDCompileUnit>(S) || isa<MDFile>(S) || isa<MDSubprogram>(S) || + isa<MDLexicalBlockBase>(S)) && "Unexpected scope info"); - DIScope Scope(S); - StringRef Dir = Scope.getDirectory(), - Filename = Scope.getFilename(); + auto *Scope = cast<MDScope>(S); + StringRef Dir = Scope->getDirectory(), + Filename = Scope->getFilename(); char *&Result = DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)]; if (Result) return Result; @@ -40,7 +39,7 @@ StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) { if (Filename.find(':') == 1) Filepath = Filename; else - Filepath = (Dir + Twine("\\") + Filename).str(); + Filepath = (Dir + "\\" + Filename).str(); // Canonicalize the path. We have to do it textually because we may no longer // have access the file in the filesystem. @@ -81,7 +80,7 @@ StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) { void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL, const MachineFunction *MF) { - const MDNode *Scope = DL.getScope(MF->getFunction()->getContext()); + const MDNode *Scope = DL.getScope(); if (!Scope) return; StringRef Filename = getFullFilepath(Scope); @@ -193,7 +192,7 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { StringRef GVName = GV->getName(); StringRef FuncName; if (DISubprogram SP = getDISubprogram(GV)) - FuncName = SP.getDisplayName(); + FuncName = SP->getDisplayName(); // FIXME Clang currently sets DisplayName to "bar" for a C++ // "namespace_foo::bar" function, see PR21528. Luckily, dbghelp.dll is trying @@ -330,7 +329,7 @@ void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) { DebugLoc PrologEndLoc; bool EmptyPrologue = true; for (const auto &MBB : *MF) { - if (!PrologEndLoc.isUnknown()) + if (PrologEndLoc) break; for (const auto &MI : MBB) { if (MI.isDebugValue()) @@ -339,8 +338,7 @@ void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) { // First known non-DBG_VALUE and non-frame setup location marks // the beginning of the function body. // FIXME: do we need the first subcondition? - if (!MI.getFlag(MachineInstr::FrameSetup) && - (!MI.getDebugLoc().isUnknown())) { + if (!MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) { PrologEndLoc = MI.getDebugLoc(); break; } @@ -348,9 +346,8 @@ void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) { } } // Record beginning of function if we have a non-empty prologue. - if (!PrologEndLoc.isUnknown() && !EmptyPrologue) { - DebugLoc FnStartDL = - PrologEndLoc.getFnDebugLoc(MF->getFunction()->getContext()); + if (PrologEndLoc && !EmptyPrologue) { + DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc(); maybeRecordLocation(FnStartDL, MF); } } @@ -377,7 +374,7 @@ void WinCodeViewLineTables::beginInstruction(const MachineInstr *MI) { if (!Asm || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup)) return; DebugLoc DL = MI->getDebugLoc(); - if (DL == PrevInstLoc || DL.isUnknown()) + if (DL == PrevInstLoc || !DL) return; maybeRecordLocation(DL, Asm->MF); } diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h index 8492eac..c66d141 100644 --- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h +++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h @@ -114,7 +114,7 @@ class WinCodeViewLineTables : public AsmPrinterHandler { public: WinCodeViewLineTables(AsmPrinter *Asm); - ~WinCodeViewLineTables() { + ~WinCodeViewLineTables() override { for (DirAndFilenameToFilepathMapTy::iterator I = DirAndFilenameToFilepathMap.begin(), E = DirAndFilenameToFilepathMap.end(); diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 6c9d048..35376e1 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -693,11 +693,11 @@ static bool SinkCast(CastInst *CI) { InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", InsertPt); - MadeChange = true; } // Replace a use of the cast with a use of the new cast. TheUse = InsertedCast; + MadeChange = true; ++NumCastUses; } @@ -747,13 +747,60 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ return SinkCast(CI); } -/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce +/// CombineUAddWithOverflow - try to combine CI into a call to the +/// llvm.uadd.with.overflow intrinsic if possible. +/// +/// Return true if any changes were made. +static bool CombineUAddWithOverflow(CmpInst *CI) { + Value *A, *B; + Instruction *AddI; + if (!match(CI, + m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI)))) + return false; + + Type *Ty = AddI->getType(); + if (!isa<IntegerType>(Ty)) + return false; + + // We don't want to move around uses of condition values this late, so we we + // check if it is legal to create the call to the intrinsic in the basic + // block containing the icmp: + + if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse()) + return false; + +#ifndef NDEBUG + // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption + // for now: + if (AddI->hasOneUse()) + assert(*AddI->user_begin() == CI && "expected!"); +#endif + + Module *M = CI->getParent()->getParent()->getParent(); + Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty); + + auto *InsertPt = AddI->hasOneUse() ? CI : AddI; + + auto *UAddWithOverflow = + CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt); + auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt); + auto *Overflow = + ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt); + + CI->replaceAllUsesWith(Overflow); + AddI->replaceAllUsesWith(UAdd); + CI->eraseFromParent(); + AddI->eraseFromParent(); + return true; +} + +/// SinkCmpExpression - Sink the given CmpInst into user blocks to reduce /// the number of virtual registers that must be created and coalesced. This is /// a clear win except on targets with multiple condition code registers /// (PowerPC), where it might lose; some adjustment may be wanted there. /// /// Return true if any changes are made. -static bool OptimizeCmpExpression(CmpInst *CI) { +static bool SinkCmpExpression(CmpInst *CI) { BasicBlock *DefBB = CI->getParent(); /// InsertedCmp - Only insert a cmp in each block once. @@ -787,21 +834,33 @@ static bool OptimizeCmpExpression(CmpInst *CI) { CmpInst::Create(CI->getOpcode(), CI->getPredicate(), CI->getOperand(0), CI->getOperand(1), "", InsertPt); - MadeChange = true; } // Replace a use of the cmp with a use of the new cmp. TheUse = InsertedCmp; + MadeChange = true; ++NumCmpUses; } // If we removed all uses, nuke the cmp. - if (CI->use_empty()) + if (CI->use_empty()) { CI->eraseFromParent(); + MadeChange = true; + } return MadeChange; } +static bool OptimizeCmpExpression(CmpInst *CI) { + if (SinkCmpExpression(CI)) + return true; + + if (CombineUAddWithOverflow(CI)) + return true; + + return false; +} + /// isExtractBitsCandidateUse - Check if the candidates could /// be combined with shift instruction, which includes: /// 1. Truncate instruction @@ -1081,8 +1140,9 @@ static void ScalarizeMaskedLoad(CallInst *CI) { // CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load"); Builder.SetInsertPoint(InsertPt); - - Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx)); + + Value *Gep = + Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); LoadInst* Load = Builder.CreateLoad(Gep, false); VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx)); @@ -1176,7 +1236,8 @@ static void ScalarizeMaskedStore(CallInst *CI) { Builder.SetInsertPoint(InsertPt); Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); - Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx)); + Value *Gep = + Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); Builder.CreateStore(OneElt, Gep); // Create "else" block, fill it in the next iteration @@ -1227,13 +1288,25 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { cast<PointerType>(Arg->getType())->getAddressSpace()), 0); Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*TD, Offset); uint64_t Offset2 = Offset.getLimitedValue(); + if ((Offset2 & (PrefAlign-1)) != 0) + continue; AllocaInst *AI; - if ((Offset2 & (PrefAlign-1)) == 0 && - (AI = dyn_cast<AllocaInst>(Val)) && + if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign && TD->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2) AI->setAlignment(PrefAlign); - // TODO: Also align GlobalVariables + // Global variables can only be aligned if they are defined in this + // object (i.e. they are uniquely initialized in this object), and + // over-aligning global variables that have an explicit section is + // forbidden. + GlobalVariable *GV; + if ((GV = dyn_cast<GlobalVariable>(Val)) && + GV->hasUniqueInitializer() && + !GV->hasSection() && + GV->getAlignment() < PrefAlign && + TD->getTypeAllocSize( + GV->getType()->getElementType()) >= MinSize + Offset2) + GV->setAlignment(PrefAlign); } // If this is a memcpy (or similar) then we may be able to improve the // alignment @@ -1841,7 +1914,7 @@ class TypePromotionTransaction { Inst->removeFromParent(); } - ~InstructionRemover() { delete Replacer; } + ~InstructionRemover() override { delete Replacer; } /// \brief Really remove the instruction. void commit() override { delete Inst; } @@ -3233,7 +3306,8 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, return false; } else { Type *I8PtrTy = - Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace()); + Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace()); + Type *I8Ty = Builder.getInt8Ty(); // Start with the base register. Do this first so that subsequent address // matching finds it last, which will prevent it from trying to match it @@ -3285,7 +3359,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // SDAG consecutive load/store merging. if (ResultPtr->getType() != I8PtrTy) ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); - ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); + ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); } ResultIndex = V; @@ -3296,7 +3370,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, } else { if (ResultPtr->getType() != I8PtrTy) ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); - SunkAddr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); + SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); } if (SunkAddr->getType() != Addr->getType()) diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index ceef74d..af011a0 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -69,7 +69,7 @@ class TargetRegisterInfo; public: CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&); - ~CriticalAntiDepBreaker(); + ~CriticalAntiDepBreaker() override; /// Initialize anti-dep breaking for a new basic block. void StartBlock(MachineBasicBlock *BB) override; diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index 8f74271..6cde4c2 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -797,9 +797,8 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { // if-conversion in a single pass. The tryConvertIf() function may erase // blocks, but only blocks dominated by the head block. This makes it safe to // update the dominator tree while the post-order iterator is still active. - for (po_iterator<MachineDominatorTree*> - I = po_begin(DomTree), E = po_end(DomTree); I != E; ++I) - if (tryConvertIf(I->getBlock())) + for (auto DomNode : post_order(DomTree)) + if (tryConvertIf(DomNode->getBlock())) Changed = true; return Changed; diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp index a2c5fce..16cd9e8 100644 --- a/lib/CodeGen/GCMetadata.cpp +++ b/lib/CodeGen/GCMetadata.cpp @@ -99,10 +99,6 @@ void Printer::getAnalysisUsage(AnalysisUsage &AU) const { static const char *DescKind(GC::PointKind Kind) { switch (Kind) { - case GC::Loop: - return "loop"; - case GC::Return: - return "return"; case GC::PreCall: return "pre-call"; case GC::PostCall: diff --git a/lib/CodeGen/GCRootLowering.cpp b/lib/CodeGen/GCRootLowering.cpp index 9d38e4c..ac35165 100644 --- a/lib/CodeGen/GCRootLowering.cpp +++ b/lib/CodeGen/GCRootLowering.cpp @@ -332,19 +332,22 @@ bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { return false; FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction()); - if (!FI->getStrategy().needsSafePoints()) - return false; - MMI = &getAnalysis<MachineModuleInfo>(); TII = MF.getSubtarget().getInstrInfo(); - // Find the size of the stack frame. - FI->setFrameSize(MF.getFrameInfo()->getStackSize()); + // Find the size of the stack frame. There may be no correct static frame + // size, we use UINT64_MAX to represent this. + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + const bool DynamicFrameSize = MFI->hasVarSizedObjects() || + RegInfo->needsStackRealignment(MF); + FI->setFrameSize(DynamicFrameSize ? UINT64_MAX : MFI->getStackSize()); // Find all safe points. - FindSafePoints(MF); + if (FI->getStrategy().needsSafePoints()) + FindSafePoints(MF); - // Find the stack offsets for all roots. + // Find the concrete stack offsets for all roots (stack slots) FindStackOffsets(MF); return false; diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp index 4188e5d..153ba1a 100644 --- a/lib/CodeGen/GlobalMerge.cpp +++ b/lib/CodeGen/GlobalMerge.cpp @@ -73,9 +73,10 @@ using namespace llvm; #define DEBUG_TYPE "global-merge" +// FIXME: This is only useful as a last-resort way to disable the pass. cl::opt<bool> EnableGlobalMerge("enable-global-merge", cl::Hidden, - cl::desc("Enable global merge pass"), + cl::desc("Enable the global merge pass"), cl::init(true)); static cl::opt<bool> @@ -222,7 +223,8 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, k-i) }; - Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx); + Constant *GEP = + ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx); Globals[k]->replaceAllUsesWith(GEP); Globals[k]->eraseFromParent(); diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index f0d407f..c7e7e58 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -135,7 +135,7 @@ private: // Dead defs generated during spilling. SmallVector<MachineInstr*, 8> DeadDefs; - ~InlineSpiller() {} + ~InlineSpiller() override {} public: InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) @@ -1232,6 +1232,8 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { DebugLoc DL = MI->getDebugLoc(); DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); MachineBasicBlock *MBB = MI->getParent(); + assert(cast<MDLocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); BuildMI(*MBB, MBB->erase(MI), DL, TII.get(TargetOpcode::DBG_VALUE)) .addFrameIndex(StackSlot) .addImm(Offset) diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 0fb0c46..61d68f6 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -140,12 +140,9 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, return &MMI->getContext(); } -bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, - formatted_raw_ostream &Out, - CodeGenFileType FileType, - bool DisableVerify, - AnalysisID StartAfter, - AnalysisID StopAfter) { +bool LLVMTargetMachine::addPassesToEmitFile( + PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType, + bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter) { // Add common CodeGen passes. MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, StartAfter, StopAfter); @@ -175,7 +172,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, switch (FileType) { case CGFT_AssemblyFile: { MCInstPrinter *InstPrinter = getTarget().createMCInstPrinter( - MAI.getAssemblerDialect(), MAI, MII, MRI, STI); + Triple(getTargetTriple()), MAI.getAssemblerDialect(), MAI, MII, MRI); // Create a code emitter if asked to show the encoding. MCCodeEmitter *MCE = nullptr; @@ -184,8 +181,9 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), TargetCPU); + auto FOut = llvm::make_unique<formatted_raw_ostream>(Out); MCStreamer *S = getTarget().createAsmStreamer( - *Context, Out, Options.MCOptions.AsmVerbose, + *Context, std::move(FOut), Options.MCOptions.AsmVerbose, Options.MCOptions.MCUseDwarfDirectory, InstPrinter, MCE, MAB, Options.MCOptions.ShowMCInst); AsmStreamer.reset(S); @@ -229,9 +227,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, /// code is not supported. It fills the MCContext Ctx pointer which can be /// used to build custom MCStreamer. /// -bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, - MCContext *&Ctx, - raw_ostream &Out, +bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, + raw_pwrite_stream &Out, bool DisableVerify) { // Add common CodeGen passes. Ctx = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, nullptr); diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp index 9eaf7da..d6998d6 100644 --- a/lib/CodeGen/LexicalScopes.cpp +++ b/lib/CodeGen/LexicalScopes.cpp @@ -59,11 +59,11 @@ void LexicalScopes::extractLexicalScopes( for (const auto &MBB : *MF) { const MachineInstr *RangeBeginMI = nullptr; const MachineInstr *PrevMI = nullptr; - DebugLoc PrevDL; + const MDLocation *PrevDL = nullptr; for (const auto &MInsn : MBB) { // Check if instruction has valid location information. - const DebugLoc MIDL = MInsn.getDebugLoc(); - if (MIDL.isUnknown()) { + const MDLocation *MIDL = MInsn.getDebugLoc(); + if (!MIDL) { PrevMI = &MInsn; continue; } @@ -96,7 +96,7 @@ void LexicalScopes::extractLexicalScopes( } // Create last instruction range. - if (RangeBeginMI && PrevMI && !PrevDL.isUnknown()) { + if (RangeBeginMI && PrevMI && PrevDL) { InsnRange R(RangeBeginMI, PrevMI); MIRanges.push_back(R); MI2ScopeMap[RangeBeginMI] = getOrCreateLexicalScope(PrevDL); @@ -106,20 +106,17 @@ void LexicalScopes::extractLexicalScopes( /// findLexicalScope - Find lexical scope, either regular or inlined, for the /// given DebugLoc. Return NULL if not found. -LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) { - MDNode *Scope = nullptr; - MDNode *IA = nullptr; - DL.getScopeAndInlinedAt(Scope, IA, MF->getFunction()->getContext()); +LexicalScope *LexicalScopes::findLexicalScope(const MDLocation *DL) { + MDLocalScope *Scope = DL->getScope(); if (!Scope) return nullptr; // The scope that we were created with could have an extra file - which // isn't what we care about in this case. - DIDescriptor D = DIDescriptor(Scope); - if (D.isLexicalBlockFile()) - Scope = DILexicalBlockFile(Scope).getScope(); + if (auto *File = dyn_cast<MDLexicalBlockFile>(Scope)) + Scope = File->getScope(); - if (IA) { + if (auto *IA = DL->getInlinedAt()) { auto I = InlinedLexicalScopeMap.find(std::make_pair(Scope, IA)); return I != InlinedLexicalScopeMap.end() ? &I->second : nullptr; } @@ -128,46 +125,39 @@ LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) { /// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If /// not available then create new lexical scope. -LexicalScope *LexicalScopes::getOrCreateLexicalScope(DebugLoc DL) { - if (DL.isUnknown()) - return nullptr; - MDNode *Scope = nullptr; - MDNode *InlinedAt = nullptr; - DL.getScopeAndInlinedAt(Scope, InlinedAt, MF->getFunction()->getContext()); - - if (InlinedAt) { +LexicalScope *LexicalScopes::getOrCreateLexicalScope(const MDLocalScope *Scope, + const MDLocation *IA) { + if (IA) { // Create an abstract scope for inlined function. getOrCreateAbstractScope(Scope); // Create an inlined scope for inlined function. - return getOrCreateInlinedScope(Scope, InlinedAt); + return getOrCreateInlinedScope(Scope, IA); } return getOrCreateRegularScope(Scope); } /// getOrCreateRegularScope - Find or create a regular lexical scope. -LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) { - DIDescriptor D = DIDescriptor(Scope); - if (D.isLexicalBlockFile()) { - Scope = DILexicalBlockFile(Scope).getScope(); - D = DIDescriptor(Scope); - } +LexicalScope * +LexicalScopes::getOrCreateRegularScope(const MDLocalScope *Scope) { + if (auto *File = dyn_cast<MDLexicalBlockFile>(Scope)) + Scope = File->getScope(); auto I = LexicalScopeMap.find(Scope); if (I != LexicalScopeMap.end()) return &I->second; + // FIXME: Should the following dyn_cast be MDLexicalBlock? LexicalScope *Parent = nullptr; - if (D.isLexicalBlock()) - Parent = getOrCreateLexicalScope(DebugLoc::getFromDILexicalBlock(Scope)); + if (auto *Block = dyn_cast<MDLexicalBlockBase>(Scope)) + Parent = getOrCreateLexicalScope(Block->getScope()); I = LexicalScopeMap.emplace(std::piecewise_construct, std::forward_as_tuple(Scope), - std::forward_as_tuple(Parent, DIDescriptor(Scope), - nullptr, false)).first; + std::forward_as_tuple(Parent, Scope, nullptr, + false)).first; if (!Parent) { - assert(DIDescriptor(Scope).isSubprogram()); - assert(DISubprogram(Scope).describes(MF->getFunction())); + assert(cast<MDSubprogram>(Scope)->describes(MF->getFunction())); assert(!CurrentFnLexicalScope); CurrentFnLexicalScope = &I->second; } @@ -176,19 +166,19 @@ LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) { } /// getOrCreateInlinedScope - Find or create an inlined lexical scope. -LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *ScopeNode, - MDNode *InlinedAt) { - std::pair<const MDNode*, const MDNode*> P(ScopeNode, InlinedAt); +LexicalScope * +LexicalScopes::getOrCreateInlinedScope(const MDLocalScope *Scope, + const MDLocation *InlinedAt) { + std::pair<const MDLocalScope *, const MDLocation *> P(Scope, InlinedAt); auto I = InlinedLexicalScopeMap.find(P); if (I != InlinedLexicalScopeMap.end()) return &I->second; LexicalScope *Parent; - DILexicalBlock Scope(ScopeNode); - if (Scope.isSubprogram()) - Parent = getOrCreateLexicalScope(DebugLoc::getFromDILocation(InlinedAt)); + if (auto *Block = dyn_cast<MDLexicalBlockBase>(Scope)) + Parent = getOrCreateInlinedScope(Block->getScope(), InlinedAt); else - Parent = getOrCreateInlinedScope(Scope.getContext(), InlinedAt); + Parent = getOrCreateLexicalScope(InlinedAt); I = InlinedLexicalScopeMap.emplace(std::piecewise_construct, std::forward_as_tuple(P), @@ -199,27 +189,26 @@ LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *ScopeNode, } /// getOrCreateAbstractScope - Find or create an abstract lexical scope. -LexicalScope *LexicalScopes::getOrCreateAbstractScope(const MDNode *N) { - assert(N && "Invalid Scope encoding!"); +LexicalScope * +LexicalScopes::getOrCreateAbstractScope(const MDLocalScope *Scope) { + assert(Scope && "Invalid Scope encoding!"); - DIDescriptor Scope(N); - if (Scope.isLexicalBlockFile()) - Scope = DILexicalBlockFile(Scope).getScope(); + if (auto *File = dyn_cast<MDLexicalBlockFile>(Scope)) + Scope = File->getScope(); auto I = AbstractScopeMap.find(Scope); if (I != AbstractScopeMap.end()) return &I->second; + // FIXME: Should the following isa be MDLexicalBlock? LexicalScope *Parent = nullptr; - if (Scope.isLexicalBlock()) { - DILexicalBlock DB(Scope); - DIDescriptor ParentDesc = DB.getContext(); - Parent = getOrCreateAbstractScope(ParentDesc); - } + if (auto *Block = dyn_cast<MDLexicalBlockBase>(Scope)) + Parent = getOrCreateAbstractScope(Block->getScope()); + I = AbstractScopeMap.emplace(std::piecewise_construct, std::forward_as_tuple(Scope), std::forward_as_tuple(Parent, Scope, nullptr, true)).first; - if (Scope.isSubprogram()) + if (isa<MDSubprogram>(Scope)) AbstractScopesList.push_back(&I->second); return &I->second; } @@ -280,7 +269,7 @@ void LexicalScopes::assignInstructionRanges( /// have machine instructions that belong to lexical scope identified by /// DebugLoc. void LexicalScopes::getMachineBasicBlocks( - DebugLoc DL, SmallPtrSetImpl<const MachineBasicBlock *> &MBBs) { + const MDLocation *DL, SmallPtrSetImpl<const MachineBasicBlock *> &MBBs) { MBBs.clear(); LexicalScope *Scope = getOrCreateLexicalScope(DL); if (!Scope) @@ -303,7 +292,7 @@ void LexicalScopes::getMachineBasicBlocks( /// dominates - Return true if DebugLoc's lexical scope dominates at least one /// machine instruction's lexical scope in a given machine basic block. -bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) { +bool LexicalScopes::dominates(const MDLocation *DL, MachineBasicBlock *MBB) { LexicalScope *Scope = getOrCreateLexicalScope(DL); if (!Scope) return false; @@ -315,12 +304,10 @@ bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) { bool Result = false; for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { - DebugLoc IDL = I->getDebugLoc(); - if (IDL.isUnknown()) - continue; - if (LexicalScope *IScope = getOrCreateLexicalScope(IDL)) - if (Scope->dominates(IScope)) - return true; + if (const MDLocation *IDL = I->getDebugLoc()) + if (LexicalScope *IScope = getOrCreateLexicalScope(IDL)) + if (Scope->dominates(IScope)) + return true; } return Result; } diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index e3791be..c2993db 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -158,10 +158,10 @@ public: UserValue *getNext() const { return next; } /// match - Does this UserValue match the parameters? - bool match(const MDNode *Var, const MDNode *Expr, unsigned Offset, - bool indirect) const { - return Var == Variable && Expr == Expression && Offset == offset && - indirect == IsIndirect; + bool match(const MDNode *Var, const MDNode *Expr, const MDLocation *IA, + unsigned Offset, bool indirect) const { + return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA && + Offset == offset && indirect == IsIndirect; } /// merge - Merge equivalence classes. @@ -269,12 +269,6 @@ public: void emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, const TargetInstrInfo &TRI); - /// findDebugLoc - Return DebugLoc used for this DBG_VALUE instruction. A - /// variable may have more than one corresponding DBG_VALUE instructions. - /// Only first one needs DebugLoc to identify variable's lexical scope - /// in source file. - DebugLoc findDebugLoc(); - /// getDebugLoc - Return DebugLoc of this UserValue. DebugLoc getDebugLoc() { return dl;} void print(raw_ostream &, const TargetRegisterInfo *); @@ -363,10 +357,47 @@ public: }; } // namespace +static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS, + const LLVMContext &Ctx) { + if (!DL) + return; + + auto *Scope = cast<MDScope>(DL.getScope()); + // Omit the directory, because it's likely to be long and uninteresting. + CommentOS << Scope->getFilename(); + CommentOS << ':' << DL.getLine(); + if (DL.getCol() != 0) + CommentOS << ':' << DL.getCol(); + + DebugLoc InlinedAtDL = DL.getInlinedAt(); + if (!InlinedAtDL) + return; + + CommentOS << " @[ "; + printDebugLoc(InlinedAtDL, CommentOS, Ctx); + CommentOS << " ]"; +} + +static void printExtendedName(raw_ostream &OS, const MDLocalVariable *V, + const MDLocation *DL) { + const LLVMContext &Ctx = V->getContext(); + StringRef Res = V->getName(); + if (!Res.empty()) + OS << Res << "," << V->getLine(); + if (auto *InlinedAt = DL->getInlinedAt()) { + if (DebugLoc InlinedAtDL = InlinedAt) { + OS << " @["; + printDebugLoc(InlinedAtDL, OS, Ctx); + OS << "]"; + } + } +} + void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { - DIVariable DV(Variable); + DIVariable DV = cast<MDLocalVariable>(Variable); OS << "!\""; - DV.printExtendedName(OS); + printExtendedName(OS, DV, dl); + OS << "\"\t"; if (offset) OS << '+' << offset; @@ -433,7 +464,7 @@ UserValue *LDVImpl::getUserValue(const MDNode *Var, const MDNode *Expr, UserValue *UV = Leader->getLeader(); Leader = UV; for (; UV; UV = UV->getNext()) - if (UV->match(Var, Expr, Offset, IsIndirect)) + if (UV->match(Var, Expr, DL->getInlinedAt(), Offset, IsIndirect)) return UV; } @@ -942,11 +973,6 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx, std::next(MachineBasicBlock::iterator(MI)); } -DebugLoc UserValue::findDebugLoc() { - DebugLoc D = dl; - dl = DebugLoc(); - return D; -} void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, unsigned LocNo, LiveIntervals &LIS, @@ -955,11 +981,14 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, MachineOperand &Loc = locations[LocNo]; ++NumInsertedDebugValues; + assert(cast<MDLocalVariable>(Variable) + ->isValidLocationForIntrinsic(getDebugLoc()) && + "Expected inlined-at fields to agree"); if (Loc.isReg()) - BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE), + BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE), IsIndirect, Loc.getReg(), offset, Variable, Expression); else - BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)) + BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)) .addOperand(Loc) .addImm(offset) .addMetadata(Variable) diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h index 9748329..fe296bc 100644 --- a/lib/CodeGen/LiveDebugVariables.h +++ b/lib/CodeGen/LiveDebugVariables.h @@ -38,7 +38,7 @@ public: static char ID; // Pass identification, replacement for typeid LiveDebugVariables(); - ~LiveDebugVariables(); + ~LiveDebugVariables() override; /// renameRegister - Move any user variables in OldReg to NewReg:SubIdx. /// @param OldReg Old virtual register that is going away. diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 2afd7fa..d75e441 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -816,23 +816,45 @@ static VNInfo *searchForVNI(const SlotIndexes &Indexes, LiveRange &LR, static void determineMissingVNIs(const SlotIndexes &Indexes, LiveInterval &LI) { SmallPtrSet<const MachineBasicBlock*, 5> Visited; - for (LiveRange::Segment &S : LI.segments) { - if (S.valno != nullptr) - continue; - // This can only happen at the begin of a basic block. - assert(S.start.isBlock() && "valno should only be missing at block begin"); - - Visited.clear(); - const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(S.start); - for (const MachineBasicBlock *Pred : MBB->predecessors()) { - VNInfo *VNI = searchForVNI(Indexes, LI, Pred, Visited); - if (VNI != nullptr) { - S.valno = VNI; - break; + + LiveRange::iterator OutIt; + VNInfo *PrevValNo = nullptr; + for (LiveRange::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { + LiveRange::Segment &S = *I; + // Determine final VNI if necessary. + if (S.valno == nullptr) { + // This can only happen at the begin of a basic block. + assert(S.start.isBlock() && "valno should only be missing at block begin"); + + Visited.clear(); + const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(S.start); + for (const MachineBasicBlock *Pred : MBB->predecessors()) { + VNInfo *VNI = searchForVNI(Indexes, LI, Pred, Visited); + if (VNI != nullptr) { + S.valno = VNI; + break; + } } + assert(S.valno != nullptr && "could not determine valno"); + } + // Merge with previous segment if it has the same VNI. + if (PrevValNo == S.valno && OutIt->end == S.start) { + OutIt->end = S.end; + } else { + // Didn't merge. Move OutIt to next segment. + if (PrevValNo == nullptr) + OutIt = LI.begin(); + else + ++OutIt; + + if (OutIt != I) + *OutIt = *I; + PrevValNo = S.valno; } - assert(S.valno != nullptr && "could not determine valno"); } + // If we merged some segments chop off the end. + ++OutIt; + LI.segments.erase(OutIt, LI.end()); } void LiveInterval::constructMainRangeFromSubranges( @@ -955,6 +977,12 @@ void LiveInterval::constructMainRangeFromSubranges( NeedVNIFixup = true; } + // In rare cases we can produce adjacent segments with the same value + // number (if they come from different subranges, but happen to have + // the same defining instruction). VNIFixup will fix those cases. + if (!empty() && segments.back().end == Pos && + segments.back().valno == VNI) + NeedVNIFixup = true; CurrentSegment.start = Pos; CurrentSegment.valno = VNI; ConstructingSegment = true; diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 98359b1..34131bb 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -250,7 +250,7 @@ std::string MachineBasicBlock::getFullName() const { if (getBasicBlock()) Name += getBasicBlock()->getName(); else - Name += (Twine("BB") + Twine(getNumber())).str(); + Name += ("BB" + Twine(getNumber())).str(); return Name; } diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index ecc50c9..2969bad 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -661,7 +661,7 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L, for (MachineBasicBlock *MBB : L.getBlocks()) { BlockChain &Chain = *BlockToChain[MBB]; // Ensure that this block is at the end of a chain; otherwise it could be - // mid-way through an inner loop or a successor of an analyzable branch. + // mid-way through an inner loop or a successor of an unanalyzable branch. if (MBB != *std::prev(Chain.end())) continue; @@ -715,7 +715,7 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L, // a frequency higher than the current exit before we consider breaking // the layout. BranchProbability Bias(100 - ExitBlockBias, 100); - if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth || + if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth || ExitEdgeFreq > BestExitEdgeFreq || (MBB->isLayoutSuccessor(Succ) && !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) { @@ -724,8 +724,8 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L, } } - // Restore the old exiting state, no viable looping successor was found. if (!HasLoopingSucc) { + // Restore the old exiting state, no viable looping successor was found. ExitingBB = OldExitingBB; BestExitEdgeFreq = OldBestExitEdgeFreq; continue; diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 6ceace8..448531f 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -380,7 +380,7 @@ namespace llvm { DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} static std::string getGraphName(const MachineFunction *F) { - return "CFG for '" + F->getName().str() + "' function"; + return ("CFG for '" + F->getName() + "' function").str(); } std::string getNodeLabel(const MachineBasicBlock *Node, @@ -468,7 +468,7 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, SmallString<60> Name; raw_svector_ostream(Name) << Prefix << "JTI" << getFunctionNumber() << '_' << JTI; - return Ctx.GetOrCreateSymbol(Name.str()); + return Ctx.GetOrCreateSymbol(Name); } /// getPICBaseSymbol - Return a function-local symbol to represent the PIC diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 1240efb..d154110 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -881,8 +881,8 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other, } // If DebugLoc does not match then two dbg.values are not identical. if (isDebugValue()) - if (!getDebugLoc().isUnknown() && !Other->getDebugLoc().isUnknown() - && getDebugLoc() != Other->getDebugLoc()) + if (getDebugLoc() && Other->getDebugLoc() && + getDebugLoc() != Other->getDebugLoc()) return false; return true; } @@ -1619,12 +1619,9 @@ void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const { } if (isDebugValue() && MO.isMetadata()) { // Pretty print DBG_VALUE instructions. - const MDNode *MD = MO.getMetadata(); - DIDescriptor DI(MD); - DIVariable DIV(MD); - - if (DI.isVariable() && !DIV.getName().empty()) - OS << "!\"" << DIV.getName() << '\"'; + DIVariable DIV = dyn_cast<MDLocalVariable>(MO.getMetadata()); + if (DIV && !DIV->getName().empty()) + OS << "!\"" << DIV->getName() << '\"'; else MO.print(OS, TRI); } else if (TRI && (isInsertSubreg() || isRegSequence()) && MO.isImm()) { @@ -1711,13 +1708,13 @@ void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const { } // Print debug location information. - if (isDebugValue() && getOperand(e - 1).isMetadata()) { + if (isDebugValue() && getOperand(e - 2).isMetadata()) { if (!HaveSemi) OS << ";"; - DIVariable DV(getOperand(e - 1).getMetadata()); - OS << " line no:" << DV.getLineNumber(); - if (MDNode *InlinedAt = DV.getInlinedAt()) { - DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt); - if (!InlinedAtDL.isUnknown() && MF) { + DIVariable DV = cast<MDLocalVariable>(getOperand(e - 2).getMetadata()); + OS << " line no:" << DV->getLine(); + if (auto *InlinedAt = debugLoc->getInlinedAt()) { + DebugLoc InlinedAtDL(InlinedAt); + if (InlinedAtDL && MF) { OS << " inlined @[ "; InlinedAtDL.print(OS); OS << " ]"; @@ -1725,7 +1722,7 @@ void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const { } if (isIndirectDebugValue()) OS << " indirect"; - } else if (!debugLoc.isUnknown() && MF) { + } else if (debugLoc && MF) { if (!HaveSemi) OS << ";"; OS << " dbg:"; debugLoc.print(OS); diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 2f65a2e..9756f13 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -10,10 +10,6 @@ // This pass performs loop invariant code motion on machine instructions. We // attempt to remove as much code from the body of a loop as possible. // -// This pass does not attempt to throttle itself to limit register pressure. -// The register allocation phases are expected to perform rematerialization -// to recover when register pressure is high. -// // This pass is not intended to be a replacement or a complete alternative // for the LLVM-IR-level LICM pass. It is only designed to hoist simple // constructs that are not exposed before lowering and instruction selection. @@ -104,7 +100,7 @@ namespace { SmallSet<unsigned, 32> RegSeen; SmallVector<unsigned, 8> RegPressure; - // Register pressure "limit" per register class. If the pressure + // Register pressure "limit" per register pressure set. If the pressure // is higher than the limit, then it's considered high. SmallVector<unsigned, 8> RegLimit; @@ -214,7 +210,8 @@ namespace { /// CanCauseHighRegPressure - Visit BBs from header to current BB, /// check if hoisting an instruction of the given cost matrix can cause high /// register pressure. - bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, bool Cheap); + bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost, + bool Cheap); /// UpdateBackTraceRegPressure - Traverse the back trace from header to /// the current block and update their register pressures to reflect the @@ -254,21 +251,25 @@ namespace { /// if there is little to no overhead moving instructions into loops. void SinkIntoLoop(); - /// getRegisterClassIDAndCost - For a given MI, register, and the operand - /// index, return the ID and cost of its representative register class by - /// reference. - void getRegisterClassIDAndCost(const MachineInstr *MI, - unsigned Reg, unsigned OpIdx, - unsigned &RCId, unsigned &RCCost) const; - /// InitRegPressure - Find all virtual register references that are liveout /// of the preheader to initialize the starting "register pressure". Note /// this does not count live through (livein but not used) registers. void InitRegPressure(MachineBasicBlock *BB); + /// calcRegisterCost - Calculate the additional register pressure that the + /// registers used in MI cause. + /// + /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to + /// figure out which usages are live-ins. + /// FIXME: Figure out a way to consider 'RegSeen' from all code paths. + DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI, + bool ConsiderSeen, + bool ConsiderUnseenAsDef); + /// UpdateRegPressure - Update estimate of register pressure after the /// specified instruction. - void UpdateRegPressure(const MachineInstr *MI); + void UpdateRegPressure(const MachineInstr *MI, + bool ConsiderUnseenAsDef = false); /// ExtractHoistableLoad - Unfold a load from the given machineinstr if /// the load itself could be hoisted. Return the unfolded and hoistable @@ -354,13 +355,12 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { if (PreRegAlloc) { // Estimate register pressure during pre-regalloc pass. - unsigned NumRC = TRI->getNumRegClasses(); - RegPressure.resize(NumRC); + unsigned NumRPS = TRI->getNumRegPressureSets(); + RegPressure.resize(NumRPS); std::fill(RegPressure.begin(), RegPressure.end(), 0); - RegLimit.resize(NumRC); - for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), - E = TRI->regclass_end(); I != E; ++I) - RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF); + RegLimit.resize(NumRPS); + for (unsigned i = 0, e = NumRPS; i != e; ++i) + RegLimit[i] = TRI->getRegPressureSetLimit(MF, i); } // Get our Loop information... @@ -836,23 +836,6 @@ static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); } -/// getRegisterClassIDAndCost - For a given MI, register, and the operand -/// index, return the ID and cost of its representative register class. -void -MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI, - unsigned Reg, unsigned OpIdx, - unsigned &RCId, unsigned &RCCost) const { - const TargetRegisterClass *RC = MRI->getRegClass(Reg); - MVT VT = *RC->vt_begin(); - if (VT == MVT::Untyped) { - RCId = RC->getID(); - RCCost = 1; - } else { - RCId = TLI->getRepRegClassFor(VT)->getID(); - RCCost = TLI->getRepRegClassCostFor(VT); - } -} - /// InitRegPressure - Find all virtual register references that are liveout of /// the preheader to initialize the starting "register pressure". Note this /// does not count live through (livein but not used) registers. @@ -870,41 +853,30 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { InitRegPressure(*BB->pred_begin()); } - for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end(); - MII != E; ++MII) { - MachineInstr *MI = &*MII; - for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || MO.isImplicit()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - - bool isNew = RegSeen.insert(Reg).second; - unsigned RCId, RCCost; - getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); - if (MO.isDef()) - RegPressure[RCId] += RCCost; - else { - bool isKill = isOperandKill(MO, MRI); - if (isNew && !isKill) - // Haven't seen this, it must be a livein. - RegPressure[RCId] += RCCost; - else if (!isNew && isKill) - RegPressure[RCId] -= RCCost; - } - } - } + for (const MachineInstr &MI : *BB) + UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true); } /// UpdateRegPressure - Update estimate of register pressure after the /// specified instruction. -void MachineLICM::UpdateRegPressure(const MachineInstr *MI) { - if (MI->isImplicitDef()) - return; +void MachineLICM::UpdateRegPressure(const MachineInstr *MI, + bool ConsiderUnseenAsDef) { + auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef); + for (const auto &RPIdAndCost : Cost) { + unsigned Class = RPIdAndCost.first; + if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second) + RegPressure[Class] = 0; + else + RegPressure[Class] += RPIdAndCost.second; + } +} - SmallVector<unsigned, 4> Defs; +DenseMap<unsigned, int> +MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, + bool ConsiderUnseenAsDef) { + DenseMap<unsigned, int> Cost; + if (MI->isImplicitDef()) + return Cost; for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || MO.isImplicit()) @@ -913,27 +885,33 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI) { if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - bool isNew = RegSeen.insert(Reg).second; + // FIXME: It seems bad to use RegSeen only for some of these calculations. + bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false; + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + + RegClassWeight W = TRI->getRegClassWeight(RC); + int RCCost = 0; if (MO.isDef()) - Defs.push_back(Reg); - else if (!isNew && isOperandKill(MO, MRI)) { - unsigned RCId, RCCost; - getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); - if (RCCost > RegPressure[RCId]) - RegPressure[RCId] = 0; + RCCost = W.RegWeight; + else { + bool isKill = isOperandKill(MO, MRI); + if (isNew && !isKill && ConsiderUnseenAsDef) + // Haven't seen this, it must be a livein. + RCCost = W.RegWeight; + else if (!isNew && isKill) + RCCost = -W.RegWeight; + } + if (RCCost == 0) + continue; + const int *PS = TRI->getRegClassPressureSets(RC); + for (; *PS != -1; ++PS) { + if (Cost.find(*PS) == Cost.end()) + Cost[*PS] = RCCost; else - RegPressure[RCId] -= RCCost; + Cost[*PS] += RCCost; } } - - unsigned Idx = 0; - while (!Defs.empty()) { - unsigned Reg = Defs.pop_back_val(); - unsigned RCId, RCCost; - getRegisterClassIDAndCost(MI, Reg, Idx, RCId, RCCost); - RegPressure[RCId] += RCCost; - ++Idx; - } + return Cost; } /// isLoadFromGOTOrConstantPool - Return true if this machine instruction @@ -1125,27 +1103,23 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { /// CanCauseHighRegPressure - Visit BBs from header to current BB, check /// if hoisting an instruction of the given cost matrix can cause high /// register pressure. -bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, +bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost, bool CheapInstr) { - for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); - CI != CE; ++CI) { - if (CI->second <= 0) + for (const auto &RPIdAndCost : Cost) { + if (RPIdAndCost.second <= 0) continue; - unsigned RCId = CI->first; - unsigned Limit = RegLimit[RCId]; - int Cost = CI->second; + unsigned Class = RPIdAndCost.first; + int Limit = RegLimit[Class]; // Don't hoist cheap instructions if they would increase register pressure, // even if we're under the limit. if (CheapInstr && !HoistCheapInsts) return true; - for (unsigned i = BackTrace.size(); i != 0; --i) { - SmallVectorImpl<unsigned> &RP = BackTrace[i-1]; - if (RP[RCId] + Cost >= Limit) + for (const auto &RP : BackTrace) + if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit) return true; - } } return false; @@ -1155,46 +1129,15 @@ bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, /// current block and update their register pressures to reflect the effect /// of hoisting MI from the current block to the preheader. void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { - if (MI->isImplicitDef()) - return; - // First compute the 'cost' of the instruction, i.e. its contribution // to register pressure. - DenseMap<unsigned, int> Cost; - for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || MO.isImplicit()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - - unsigned RCId, RCCost; - getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); - if (MO.isDef()) { - DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); - if (CI != Cost.end()) - CI->second += RCCost; - else - Cost.insert(std::make_pair(RCId, RCCost)); - } else if (isOperandKill(MO, MRI)) { - DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); - if (CI != Cost.end()) - CI->second -= RCCost; - else - Cost.insert(std::make_pair(RCId, -RCCost)); - } - } + auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false, + /*ConsiderUnseenAsDef=*/false); // Update register pressure of blocks from loop header to current block. - for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) { - SmallVectorImpl<unsigned> &RP = BackTrace[i]; - for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); - CI != CE; ++CI) { - unsigned RCId = CI->first; - RP[RCId] += CI->second; - } - } + for (auto &RP : BackTrace) + for (const auto &RPIdAndCost : Cost) + RP[RPIdAndCost.first] += RPIdAndCost.second; } /// IsProfitableToHoist - Return true if it is potentially profitable to hoist @@ -1229,15 +1172,8 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { if (TII->isTriviallyReMaterializable(&MI, AA)) return true; - // Estimate register pressure to determine whether to LICM the instruction. - // In low register pressure situation, we can be more aggressive about - // hoisting. Also, favors hoisting long latency instructions even in - // moderately high pressure situation. - // Cheap instructions will only be hoisted if they don't increase register - // pressure at all. // FIXME: If there are long latency loop-invariant instructions inside the // loop at this point, why didn't the optimizer's LICM hoist them? - DenseMap<unsigned, int> Cost; for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || MO.isImplicit()) @@ -1245,24 +1181,22 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - - unsigned RCId, RCCost; - getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost); - if (MO.isDef()) { - if (HasHighOperandLatency(MI, i, Reg)) { - DEBUG(dbgs() << "Hoist High Latency: " << MI); - ++NumHighLatency; - return true; - } - Cost[RCId] += RCCost; - } else if (isOperandKill(MO, MRI)) { - // Is a virtual register use is a kill, hoisting it out of the loop - // may actually reduce register pressure or be register pressure - // neutral. - Cost[RCId] -= RCCost; + if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) { + DEBUG(dbgs() << "Hoist High Latency: " << MI); + ++NumHighLatency; + return true; } } + // Estimate register pressure to determine whether to LICM the instruction. + // In low register pressure situation, we can be more aggressive about + // hoisting. Also, favors hoisting long latency instructions even in + // moderately high pressure situation. + // Cheap instructions will only be hoisted if they don't increase register + // pressure at all. + auto Cost = calcRegisterCost(&MI, /*ConsiderSeen=*/false, + /*ConsiderUnseenAsDef=*/false); + // Visit BBs from header to current BB, if hoisting this doesn't cause // high register pressure, then it's safe to proceed. if (!CanCauseHighRegPressure(Cost, CheapInstr)) { diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index fca7df0..e8bd1f8 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" @@ -425,6 +426,12 @@ void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad, Personalities.push_back(Personality); } +void MachineModuleInfo::addWinEHState(MachineBasicBlock *LandingPad, + int State) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.WinEHState = State; +} + /// addCatchTypeInfo - Provide the catch typeinfo for a landing pad. /// void MachineModuleInfo:: @@ -563,10 +570,13 @@ const Function *MachineModuleInfo::getPersonality() const { } EHPersonality MachineModuleInfo::getPersonalityType() { - if (PersonalityTypeCache == EHPersonality::Unknown) - PersonalityTypeCache = classifyEHPersonality(getPersonality()); + if (PersonalityTypeCache == EHPersonality::Unknown) { + if (const Function *F = getPersonality()) + PersonalityTypeCache = classifyEHPersonality(F); + } return PersonalityTypeCache; } + /// getPersonalityIndex - Return unique index for current personality /// function. NULL/first personality function should always get zero index. unsigned MachineModuleInfo::getPersonalityIndex() const { @@ -588,3 +598,18 @@ unsigned MachineModuleInfo::getPersonalityIndex() const { // in the zero index. return 0; } + +const Function *MachineModuleInfo::getWinEHParent(const Function *F) const { + StringRef WinEHParentName = + F->getFnAttribute("wineh-parent").getValueAsString(); + if (WinEHParentName.empty() || WinEHParentName == F->getName()) + return F; + return F->getParent()->getFunction(WinEHParentName); +} + +WinEHFuncInfo &MachineModuleInfo::getWinEHFuncInfo(const Function *F) { + auto &Ptr = FuncInfoMap[getWinEHParent(F)]; + if (!Ptr) + Ptr.reset(new WinEHFuncInfo); + return *Ptr; +} diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp index a1c7e9f..22d519e 100644 --- a/lib/CodeGen/MachineModuleInfoImpls.cpp +++ b/lib/CodeGen/MachineModuleInfoImpls.cpp @@ -31,15 +31,14 @@ static int SortSymbolPair(const void *LHS, const void *RHS) { return LHSS->getName().compare(RHSS->getName()); } -/// GetSortedStubs - Return the entries from a DenseMap in a deterministic -/// sorted orer. -MachineModuleInfoImpl::SymbolListTy -MachineModuleInfoImpl::GetSortedStubs(const DenseMap<MCSymbol*, - MachineModuleInfoImpl::StubValueTy>&Map) { +MachineModuleInfoImpl::SymbolListTy MachineModuleInfoImpl::getSortedStubs( + DenseMap<MCSymbol *, MachineModuleInfoImpl::StubValueTy> &Map) { MachineModuleInfoImpl::SymbolListTy List(Map.begin(), Map.end()); if (!List.empty()) qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair); + + Map.clear(); return List; } diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 7a3c80b..a52d05f 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -1036,8 +1036,6 @@ void ScheduleDAGMILive::schedule() { scheduleMI(SU, IsTopNode); - updateQueues(SU, IsTopNode); - if (DFSResult) { unsigned SubtreeID = DFSResult->getSubtreeID(SU); if (!ScheduledTrees.test(SubtreeID)) { @@ -1049,6 +1047,8 @@ void ScheduleDAGMILive::schedule() { // Notify the scheduling strategy after updating the DAG. SchedImpl->schedNode(SU, IsTopNode); + + updateQueues(SU, IsTopNode); } assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index 8aacd1f..5dc7c21 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -463,13 +463,11 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { // Run an upwards post-order search for the trace start. Bounds.Downward = false; Bounds.Visited.clear(); - typedef ipo_ext_iterator<const MachineBasicBlock*, LoopBounds> UpwardPO; - for (UpwardPO I = ipo_ext_begin(MBB, Bounds), E = ipo_ext_end(MBB, Bounds); - I != E; ++I) { + for (auto I : inverse_post_order_ext(MBB, Bounds)) { DEBUG(dbgs() << " pred for BB#" << I->getNumber() << ": "); TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; // All the predecessors have been visited, pick the preferred one. - TBI.Pred = pickTracePred(*I); + TBI.Pred = pickTracePred(I); DEBUG({ if (TBI.Pred) dbgs() << "BB#" << TBI.Pred->getNumber() << '\n'; @@ -477,19 +475,17 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { dbgs() << "null\n"; }); // The trace leading to I is now known, compute the depth resources. - computeDepthResources(*I); + computeDepthResources(I); } // Run a downwards post-order search for the trace end. Bounds.Downward = true; Bounds.Visited.clear(); - typedef po_ext_iterator<const MachineBasicBlock*, LoopBounds> DownwardPO; - for (DownwardPO I = po_ext_begin(MBB, Bounds), E = po_ext_end(MBB, Bounds); - I != E; ++I) { + for (auto I : post_order_ext(MBB, Bounds)) { DEBUG(dbgs() << " succ for BB#" << I->getNumber() << ": "); TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; // All the successors have been visited, pick the preferred one. - TBI.Succ = pickTraceSucc(*I); + TBI.Succ = pickTraceSucc(I); DEBUG({ if (TBI.Succ) dbgs() << "BB#" << TBI.Succ->getNumber() << '\n'; @@ -497,7 +493,7 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { dbgs() << "null\n"; }); // The trace leaving I is now known, compute the height resources. - computeHeightResources(*I); + computeHeightResources(I); } } diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index ad59fc9..55f08e4 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -141,7 +141,7 @@ namespace { TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs); - ~SchedulePostRATDList(); + ~SchedulePostRATDList() override; /// startBlock - Initialize register live-range state for scheduling in /// this block. diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index e073e6a..5334a63 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -30,6 +30,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/LLVMContext.h" @@ -752,6 +753,25 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); if (!TFI.needsFrameIndexResolution(Fn)) return; + MachineModuleInfo &MMI = Fn.getMMI(); + const Function *F = Fn.getFunction(); + const Function *ParentF = MMI.getWinEHParent(F); + unsigned FrameReg; + if (F == ParentF) { + WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(Fn.getFunction()); + // FIXME: This should be unconditional but we have bugs in the preparation + // pass. + if (FuncInfo.UnwindHelpFrameIdx != INT_MAX) + FuncInfo.UnwindHelpFrameOffset = TFI.getFrameIndexReferenceFromSP( + Fn, FuncInfo.UnwindHelpFrameIdx, FrameReg); + } else if (MMI.hasWinEHFuncInfo(F)) { + WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(Fn.getFunction()); + auto I = FuncInfo.CatchHandlerParentFrameObjIdx.find(F); + if (I != FuncInfo.CatchHandlerParentFrameObjIdx.end()) + FuncInfo.CatchHandlerParentFrameObjOffset[F] = + TFI.getFrameIndexReferenceFromSP(Fn, I->second, FrameReg); + } + // Store SPAdj at exit of a basic block. SmallVector<int, 8> SPState; SPState.resize(Fn.getNumBlockIDs()); diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index c621414..c311c7b 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -301,13 +301,9 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, const MDNode *Expr = DBG->getDebugExpression(); bool IsIndirect = DBG->isIndirectDebugValue(); uint64_t Offset = IsIndirect ? DBG->getOperand(1).getImm() : 0; - DebugLoc DL; - if (MI == MBB->end()) { - // If MI is at basic block end then use last instruction's location. - MachineBasicBlock::iterator EI = MI; - DL = (--EI)->getDebugLoc(); - } else - DL = MI->getDebugLoc(); + DebugLoc DL = DBG->getDebugLoc(); + assert(cast<MDLocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); MachineInstr *NewDV = BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::DBG_VALUE)) .addFrameIndex(FI) @@ -877,6 +873,9 @@ void RAFast::AllocateBasicBlock() { const MDNode *Expr = MI->getDebugExpression(); DebugLoc DL = MI->getDebugLoc(); MachineBasicBlock *MBB = MI->getParent(); + assert( + cast<MDLocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); MachineInstr *NewDV = BuildMI(*MBB, MBB->erase(MI), DL, TII->get(TargetOpcode::DBG_VALUE)) .addFrameIndex(SS) diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index e94f1bb..26f42c9 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -538,8 +538,9 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // Giant live ranges fall back to the global assignment heuristic, which // prevents excessive spilling in pathological cases. bool ReverseLocal = TRI->reverseLocalAssignment(); + const TargetRegisterClass &RC = *MRI->getRegClass(Reg); bool ForceGlobal = !ReverseLocal && - (Size / SlotIndex::InstrDist) > (2 * MRI->getRegClass(Reg)->getNumRegs()); + (Size / SlotIndex::InstrDist) > (2 * RC.getNumRegs()); if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() && LIS->intervalIsInOneMBB(*LI)) { @@ -552,10 +553,10 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // Allocating bottom up may allow many short LRGs to be assigned first // to one of the cheap registers. This could be much faster for very // large blocks on targets with many physical registers. - Prio = Indexes->getZeroIndex().getInstrDistance(LI->beginIndex()); + Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex()); } - } - else { + Prio |= RC.AllocationPriority << 24; + } else { // Allocate global and split ranges in long->short order. Long ranges that // don't fit should be spilled (or split) ASAP so they don't create // interference. Mark a bit to prioritize global above local ranges. diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 9e3cf41..5d958a6 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -2731,15 +2731,19 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { assert(Copy.isCopyLike()); if (!UseTerminalRule) return false; + unsigned DstReg, DstSubReg, SrcReg, SrcSubReg; + isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg); // Check if the destination of this copy has any other affinity. - unsigned DstReg = Copy.getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(DstReg) || + // If SrcReg is a physical register, the copy won't be coalesced. + // Ignoring it may have other side effect (like missing + // rematerialization). So keep it. + TargetRegisterInfo::isPhysicalRegister(SrcReg) || !isTerminalReg(DstReg, Copy, MRI)) return false; // DstReg is a terminal node. Check if it inteferes with any other // copy involving SrcReg. - unsigned SrcReg = Copy.getOperand(1).getReg(); const MachineBasicBlock *OrigBB = Copy.getParent(); const LiveInterval &DstLI = LIS->getInterval(DstReg); for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) { @@ -2751,9 +2755,11 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { // For now, just consider the copies that are in the same block. if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB) continue; - unsigned OtherReg = MI.getOperand(0).getReg(); + unsigned OtherReg, OtherSubReg, OtherSrcReg, OtherSrcSubReg; + isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg, + OtherSubReg); if (OtherReg == SrcReg) - OtherReg = MI.getOperand(1).getReg(); + OtherReg = OtherSrcReg; // Check if OtherReg is a non-terminal. if (TargetRegisterInfo::isPhysicalRegister(OtherReg) || isTerminalReg(OtherReg, MI, MRI)) @@ -2775,25 +2781,45 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { // yet, it might invalidate the iterator. const unsigned PrevSize = WorkList.size(); if (JoinGlobalCopies) { + SmallVector<MachineInstr*, 2> LocalTerminals; + SmallVector<MachineInstr*, 2> GlobalTerminals; // Coalesce copies bottom-up to coalesce local defs before local uses. They // are not inherently easier to resolve, but slightly preferable until we // have local live range splitting. In particular this is required by // cmp+jmp macro fusion. for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) { - if (!MII->isCopyLike() || applyTerminalRule(*MII)) + if (!MII->isCopyLike()) continue; - if (isLocalCopy(&(*MII), LIS)) - LocalWorkList.push_back(&(*MII)); - else - WorkList.push_back(&(*MII)); + bool ApplyTerminalRule = applyTerminalRule(*MII); + if (isLocalCopy(&(*MII), LIS)) { + if (ApplyTerminalRule) + LocalTerminals.push_back(&(*MII)); + else + LocalWorkList.push_back(&(*MII)); + } else { + if (ApplyTerminalRule) + GlobalTerminals.push_back(&(*MII)); + else + WorkList.push_back(&(*MII)); + } } + // Append the copies evicted by the terminal rule at the end of the list. + LocalWorkList.append(LocalTerminals.begin(), LocalTerminals.end()); + WorkList.append(GlobalTerminals.begin(), GlobalTerminals.end()); } else { + SmallVector<MachineInstr*, 2> Terminals; for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) - if (MII->isCopyLike() && !applyTerminalRule(*MII)) - WorkList.push_back(MII); + if (MII->isCopyLike()) { + if (applyTerminalRule(*MII)) + Terminals.push_back(&(*MII)); + else + WorkList.push_back(MII); + } + // Append the copies evicted by the terminal rule at the end of the list. + WorkList.append(Terminals.begin(), Terminals.end()); } // Try coalescing the collected copies immediately, and remove the nulls. // This prevents the WorkList from getting too large since most copies are diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a1c84c5..22fd6d6 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -705,16 +705,8 @@ static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { EltVT.getSizeInBits() >= SplatBitSize); } -// \brief Returns the SDNode if it is a constant BuildVector or constant. -static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) { - if (isa<ConstantSDNode>(N)) - return N.getNode(); - BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); - if (BV && BV->isConstant()) - return BV; - return nullptr; -} - +// \brief Returns the SDNode if it is a constant integer BuildVector +// or constant integer. static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) { if (isa<ConstantSDNode>(N)) return N.getNode(); @@ -723,6 +715,8 @@ static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) { return nullptr; } +// \brief Returns the SDNode if it is a constant float BuildVector +// or constant float. static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { if (isa<ConstantFPSDNode>(N)) return N.getNode(); @@ -773,8 +767,8 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, SDValue N0, SDValue N1) { EVT VT = N0.getValueType(); if (N0.getOpcode() == Opc) { - if (SDNode *L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) { - if (SDNode *R = isConstantBuildVectorOrConstantInt(N1)) { + if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { + if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1)) { // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R)) return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); @@ -793,8 +787,8 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, } if (N1.getOpcode() == Opc) { - if (SDNode *R = isConstantBuildVectorOrConstantInt(N1.getOperand(1))) { - if (SDNode *L = isConstantBuildVectorOrConstantInt(N0)) { + if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { + if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0)) { // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L)) return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); @@ -1583,8 +1577,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) { // fold vector ops if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (add x, 0) -> x, vector edition if (ISD::isBuildVectorAllZeros(N1.getNode())) @@ -1604,7 +1598,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C); // canonicalize constant to RHS - if (N0C && !N1C) + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); // fold (add x, 0) -> x if (N1C && N1C->isNullValue()) @@ -1624,8 +1619,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { N0C->getAPIntValue(), VT), N0.getOperand(1)); // reassociate add - SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1); - if (RADD.getNode()) + if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1)) return RADD; // fold ((0-A) + B) -> B-A if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) && @@ -1828,8 +1822,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // fold vector ops if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (sub x, 0) -> x, vector edition if (ISD::isBuildVectorAllZeros(N1.getNode())) @@ -1984,26 +1978,27 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { APInt ConstValue0, ConstValue1; // fold vector ops if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0); N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); } else { - N0IsConst = dyn_cast<ConstantSDNode>(N0) != nullptr; - ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue() - : APInt(); - N1IsConst = dyn_cast<ConstantSDNode>(N1) != nullptr; - ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue() - : APInt(); + N0IsConst = isa<ConstantSDNode>(N0); + if (N0IsConst) + ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue(); + N1IsConst = isa<ConstantSDNode>(N1); + if (N1IsConst) + ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue(); } // fold (mul c1, c2) -> c1*c2 if (N0IsConst && N1IsConst) return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode()); - // canonicalize constant to RHS - if (N0IsConst && !N1IsConst) + // canonicalize constant to RHS (vector doesn't have to splat) + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); // fold (mul x, 0) -> 0 if (N1IsConst && ConstValue1 == 0) @@ -2083,8 +2078,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { N0.getOperand(1), N1)); // reassociate mul - SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1); - if (RMUL.getNode()) + if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1)) return RMUL; return SDValue(); @@ -2096,10 +2090,9 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { EVT VT = N->getValueType(0); // fold vector ops - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (sdiv c1, c2) -> c1/c2 ConstantSDNode *N0C = isConstOrConstSplat(N0); @@ -2163,7 +2156,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA); } - // if integer divide is expensive and we satisfy the requirements, emit an + // If integer divide is expensive and we satisfy the requirements, emit an // alternate sequence. if (N1C && !TLI.isIntDivCheap()) { SDValue Op = BuildSDIV(N); @@ -2186,10 +2179,9 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { EVT VT = N->getValueType(0); // fold vector ops - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (udiv c1, c2) -> c1/c2 ConstantSDNode *N0C = isConstOrConstSplat(N0); @@ -2459,8 +2451,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); - // If the type twice as wide is legal, transform the mulhu to a wider multiply - // plus a shift. + // If the type is twice as wide is legal, transform the mulhu to a wider + // multiply plus a shift. if (VT.isSimple() && !VT.isVector()) { MVT Simple = VT.getSimpleVT(); unsigned SimpleSize = Simple.getSizeInBits(); @@ -2489,8 +2481,8 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); - // If the type twice as wide is legal, transform the mulhu to a wider multiply - // plus a shift. + // If the type is twice as wide is legal, transform the mulhu to a wider + // multiply plus a shift. if (VT.isSimple() && !VT.isVector()) { MVT Simple = VT.getSimpleVT(); unsigned SimpleSize = Simple.getSizeInBits(); @@ -2809,8 +2801,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold vector ops if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (and x, 0) -> 0, vector edition if (ISD::isBuildVectorAllZeros(N0.getNode())) @@ -2839,7 +2831,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); // canonicalize constant to RHS - if (N0C && !N1C) + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); // fold (and x, -1) -> x if (N1C && N1C->isAllOnesValue()) @@ -2850,8 +2843,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { APInt::getAllOnesValue(BitWidth))) return DAG.getConstant(0, VT); // reassociate and - SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1); - if (RAND.getNode()) + if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1)) return RAND; // fold (and (or x, C), D) -> D if (C & D) == D if (N1C && N0.getOpcode() == ISD::OR) @@ -3470,8 +3462,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // fold vector ops if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (or x, 0) -> x, vector edition if (ISD::isBuildVectorAllZeros(N0.getNode())) @@ -3556,7 +3548,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); // canonicalize constant to RHS - if (N0C && !N1C) + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); // fold (or x, 0) -> x if (N1C && N1C->isNullValue()) @@ -3580,8 +3573,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return BSwap; // reassociate or - SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1); - if (ROR.getNode()) + if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1)) return ROR; // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) // iff (c1 & c2) == 0. @@ -3875,8 +3867,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // fold vector ops if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (xor x, 0) -> x, vector edition if (ISD::isBuildVectorAllZeros(N0.getNode())) @@ -3899,14 +3891,14 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); // canonicalize constant to RHS - if (N0C && !N1C) + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); // fold (xor x, 0) -> x if (N1C && N1C->isNullValue()) return N0; // reassociate xor - SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1); - if (RXOR.getNode()) + if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1)) return RXOR; // fold !(x cc y) -> (x !cc y) @@ -4152,8 +4144,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // fold vector ops ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1); // If setcc produces all-one true value then: @@ -4332,8 +4324,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // fold vector ops ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; N1C = isConstOrConstSplat(N1); } @@ -4478,8 +4470,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold vector ops ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; N1C = isConstOrConstSplat(N1); } @@ -4889,7 +4881,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N1_0 = N1->getOperand(0); SDValue N1_1 = N1->getOperand(1); SDValue N1_2 = N1->getOperand(2); - if (N1_2 == N2) { + if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) { // Create the actual and node if we can generate good code for it. if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(), @@ -4908,7 +4900,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N2_0 = N2->getOperand(0); SDValue N2_1 = N2->getOperand(1); SDValue N2_2 = N2->getOperand(2); - if (N2_1 == N1) { + if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) { // Create the actual or node if we can generate good code for it. if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(), @@ -7037,7 +7029,6 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // Finally, this must be the case where we are shrinking elements: each input // turns into multiple outputs. - bool isS2V = ISD::isScalarToVector(BV); unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, NumOutputsPerInput*BV->getNumOperands()); @@ -7055,10 +7046,6 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { for (unsigned j = 0; j != NumOutputsPerInput; ++j) { APInt ThisVal = OpVal.trunc(DstBitSize); Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); - if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal) - // Simply turn this into a SCALAR_TO_VECTOR of the new type. - return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, - Ops[0]); OpVal = OpVal.lshr(DstBitSize); } @@ -7206,10 +7193,9 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (fadd c1, c2) -> c1 + c2 if (N0CFP && N1CFP) @@ -7400,10 +7386,9 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (fsub c1, c2) -> c1-c2 if (N0CFP && N1CFP) @@ -7552,15 +7537,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // fold vector ops if (VT.isVector()) { // This just handles C1 * C2 for vectors. Other vector folds are below. - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; - // Canonicalize vector constant to RHS. - if (N0.getOpcode() == ISD::BUILD_VECTOR && - N1.getOpcode() != ISD::BUILD_VECTOR) - if (auto *BV0 = dyn_cast<BuildVectorSDNode>(N0)) - if (BV0->isConstant()) - return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); } // fold (fmul c1, c2) -> c1*c2 @@ -7568,7 +7546,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1); // canonicalize constant to RHS - if (N0CFP && !N1CFP) + if (isConstantFPBuildVectorOrConstantFP(N0) && + !isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0); // fold (fmul A, 1.0) -> A @@ -7734,10 +7713,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (fdiv c1, c2) -> c1/c2 if (N0CFP && N1CFP) @@ -8216,11 +8194,10 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue DAGCombiner::visitFCEIL(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); EVT VT = N->getValueType(0); // fold (fceil c1) -> fceil(c1) - if (N0CFP) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); return SDValue(); @@ -8228,11 +8205,10 @@ SDValue DAGCombiner::visitFCEIL(SDNode *N) { SDValue DAGCombiner::visitFTRUNC(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); EVT VT = N->getValueType(0); // fold (ftrunc c1) -> ftrunc(c1) - if (N0CFP) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); return SDValue(); @@ -8240,11 +8216,10 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) { SDValue DAGCombiner::visitFFLOOR(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); EVT VT = N->getValueType(0); // fold (ffloor c1) -> ffloor(c1) - if (N0CFP) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); return SDValue(); @@ -10080,19 +10055,19 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; - unsigned EarliestNodeUsed = 0; + unsigned LatestNodeUsed = 0; for (unsigned i=0; i < NumElem; ++i) { // Find a chain for the new wide-store operand. Notice that some // of the store nodes that we found may not be selected for inclusion // in the wide store. The chain we use needs to be the chain of the - // earliest store node which is *used* and replaced by the wide store. - if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) - EarliestNodeUsed = i; + // latest store node which is *used* and replaced by the wide store. + if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum) + LatestNodeUsed = i; } - // The earliest Node in the DAG. - LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; + // The latest Node in the DAG. + LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; SDLoc DL(StoreNodes[0].MemNode); SDValue StoredVal; @@ -10151,17 +10126,17 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( StoredVal = DAG.getConstant(StoreInt, StoreTy); } - SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal, + SDValue NewStore = DAG.getStore(LatestOp->getChain(), DL, StoredVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), false, false, FirstInChain->getAlignment()); - // Replace the first store with the new store - CombineTo(EarliestOp, NewStore); + // Replace the last store with the new store + CombineTo(LatestOp, NewStore); // Erase all other stores. for (unsigned i = 0; i < NumElem ; ++i) { - if (StoreNodes[i].MemNode == EarliestOp) + if (StoreNodes[i].MemNode == LatestOp) continue; StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); // ReplaceAllUsesWith will replace all uses that existed when it was @@ -10538,18 +10513,19 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (NumElem < 2) return false; - // The earliest Node in the DAG. - unsigned EarliestNodeUsed = 0; - LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; + // The latest Node in the DAG. + unsigned LatestNodeUsed = 0; for (unsigned i=1; i<NumElem; ++i) { // Find a chain for the new wide-store operand. Notice that some // of the store nodes that we found may not be selected for inclusion // in the wide store. The chain we use needs to be the chain of the - // earliest store node which is *used* and replaced by the wide store. - if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) - EarliestNodeUsed = i; + // latest store node which is *used* and replaced by the wide store. + if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum) + LatestNodeUsed = i; } + LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; + // Find if it is better to use vectors or integers to load and store // to memory. EVT JointMemOpVT; @@ -10571,7 +10547,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { false, false, false, FirstLoad->getAlignment()); - SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad, + SDValue NewStore = DAG.getStore(LatestOp->getChain(), StoreDL, NewLoad, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), false, false, FirstInChain->getAlignment()); @@ -10589,12 +10565,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain()); } - // Replace the first store with the new store. - CombineTo(EarliestOp, NewStore); + // Replace the last store with the new store. + CombineTo(LatestOp, NewStore); // Erase all other stores. for (unsigned i = 0; i < NumElem ; ++i) { // Remove all Store nodes. - if (StoreNodes[i].MemNode == EarliestOp) + if (StoreNodes[i].MemNode == LatestOp) continue; StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); @@ -11523,6 +11499,68 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { return SDValue(); } +static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT OpVT = N->getOperand(0).getValueType(); + + // If the operands are legal vectors, leave them alone. + if (TLI.isTypeLegal(OpVT)) + return SDValue(); + + SDLoc DL(N); + EVT VT = N->getValueType(0); + SmallVector<SDValue, 8> Ops; + + EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); + SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); + + // Keep track of what we encounter. + bool AnyInteger = false; + bool AnyFP = false; + for (const SDValue &Op : N->ops()) { + if (ISD::BITCAST == Op.getOpcode() && + !Op.getOperand(0).getValueType().isVector()) + Ops.push_back(Op.getOperand(0)); + else if (ISD::UNDEF == Op.getOpcode()) + Ops.push_back(ScalarUndef); + else + return SDValue(); + + // Note whether we encounter an integer or floating point scalar. + // If it's neither, bail out, it could be something weird like x86mmx. + EVT LastOpVT = Ops.back().getValueType(); + if (LastOpVT.isFloatingPoint()) + AnyFP = true; + else if (LastOpVT.isInteger()) + AnyInteger = true; + else + return SDValue(); + } + + // If any of the operands is a floating point scalar bitcast to a vector, + // use floating point types throughout, and bitcast everything. + // Replace UNDEFs by another scalar UNDEF node, of the final desired type. + if (AnyFP) { + SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits()); + ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); + if (AnyInteger) { + for (SDValue &Op : Ops) { + if (Op.getValueType() == SVT) + continue; + if (Op.getOpcode() == ISD::UNDEF) + Op = ScalarUndef; + else + Op = DAG.getNode(ISD::BITCAST, DL, SVT, Op); + } + } + } + + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT, + VT.getSizeInBits() / SVT.getSizeInBits()); + return DAG.getNode(ISD::BITCAST, DL, VT, + DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops)); +} + SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector @@ -11538,9 +11576,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (ISD::allOperandsUndef(N)) return DAG.getUNDEF(VT); - // Optimize concat_vectors where one of the vectors is undef. - if (N->getNumOperands() == 2 && - N->getOperand(1)->getOpcode() == ISD::UNDEF) { + // Optimize concat_vectors where all but the first of the vectors are undef. + if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) { + return Op.getOpcode() == ISD::UNDEF; + })) { SDValue In = N->getOperand(0); assert(In.getValueType().isVector() && "Must concat vectors"); @@ -11548,6 +11587,15 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (In->getOpcode() == ISD::BITCAST && !In->getOperand(0)->getValueType(0).isVector()) { SDValue Scalar = In->getOperand(0); + + // If the bitcast type isn't legal, it might be a trunc of a legal type; + // look through the trunc so we can still do the transform: + // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar) + if (Scalar->getOpcode() == ISD::TRUNCATE && + !TLI.isTypeLegal(Scalar.getValueType()) && + TLI.isTypeLegal(Scalar->getOperand(0).getValueType())) + Scalar = Scalar->getOperand(0); + EVT SclTy = Scalar->getValueType(0); if (!SclTy.isFloatingPoint() && !SclTy.isInteger()) @@ -11615,6 +11663,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); } + // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. + if (SDValue V = combineConcatVectorOfScalars(N, DAG)) + return V; + // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR // nodes often generate nop CONCAT_VECTOR nodes. // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that @@ -11676,7 +11728,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { // type. if (V->getOperand(0).getValueType() != NVT) return SDValue(); - unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned Idx = N->getConstantOperandVal(1); unsigned NumElems = NVT.getVectorNumElements(); assert((Idx % NumElems) == 0 && "IDX in concat is not a multiple of the result vector length."); @@ -12001,6 +12053,43 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return V; } + // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - + // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. + if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) { + SmallVector<SDValue, 8> Ops; + for (int M : SVN->getMask()) { + SDValue Op = DAG.getUNDEF(VT.getScalarType()); + if (M >= 0) { + int Idx = M % NumElts; + SDValue &S = (M < (int)NumElts ? N0 : N1); + if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) { + Op = S.getOperand(Idx); + } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) { + if (Idx == 0) + Op = S.getOperand(0); + } else { + // Operand can't be combined - bail out. + break; + } + } + Ops.push_back(Op); + } + if (Ops.size() == VT.getVectorNumElements()) { + // BUILD_VECTOR requires all inputs to be of the same type, find the + // maximum type and extend them all. + EVT SVT = VT.getScalarType(); + if (SVT.isInteger()) + for (SDValue &Op : Ops) + SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); + if (SVT != VT.getScalarType()) + for (SDValue &Op : Ops) + Op = TLI.isZExtFree(Op.getValueType(), SVT) + ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT) + : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Ops); + } + } + // If this shuffle only has a single input that is a bitcasted shuffle, // attempt to merge the 2 shuffles and suitably bitcast the inputs/output // back to their original types. diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 223a149..5ffb826 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -425,7 +425,7 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) { // Check if the second operand is a constant and handle it appropriately. if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { - uint64_t Imm = CI->getZExtValue(); + uint64_t Imm = CI->getSExtValue(); // Transform "sdiv exact X, 8" -> "sra X, 3". if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) && @@ -1079,11 +1079,16 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { // The donothing intrinsic does, well, nothing. case Intrinsic::donothing: return true; + case Intrinsic::eh_actions: { + unsigned ResultReg = getRegForValue(UndefValue::get(II->getType())); + if (!ResultReg) + return false; + updateValueMap(II, ResultReg); + return true; + } case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast<DbgDeclareInst>(II); - DIVariable DIVar(DI->getVariable()); - assert((!DIVar || DIVar.isVariable()) && - "Variable in DbgDeclareInst should be either null or a DIVariable."); + DIVariable DIVar = DI->getVariable(); if (!DIVar || !FuncInfo.MF->getMMI().hasDebugInfo()) { DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; @@ -1124,6 +1129,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { false); if (Op) { + assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) && + "Expected inlined-at fields to agree"); if (Op->isReg()) { Op->setIsDebug(true); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -1148,6 +1155,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { const DbgValueInst *DI = cast<DbgValueInst>(II); const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); const Value *V = DI->getValue(); + assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) && + "Expected inlined-at fields to agree"); if (!V) { // Currently the optimizer can produce this; insert an undef to // help debugging. Probably the optimizer should not do this. diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 291b583..4b8ae32 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" @@ -79,12 +80,35 @@ static ISD::NodeType getPreferredExtendForValue(const Value *V) { return ExtendKind; } +namespace { +struct WinEHNumbering { + WinEHNumbering(WinEHFuncInfo &FuncInfo) : FuncInfo(FuncInfo), NextState(0) {} + + WinEHFuncInfo &FuncInfo; + int NextState; + + SmallVector<ActionHandler *, 4> HandlerStack; + SmallPtrSet<const Function *, 4> VisitedHandlers; + + int currentEHNumber() const { + return HandlerStack.empty() ? -1 : HandlerStack.back()->getEHState(); + } + + void createUnwindMapEntry(int ToState, ActionHandler *AH); + void createTryBlockMapEntry(int TryLow, int TryHigh, + ArrayRef<CatchHandler *> Handlers); + void processCallSite(ArrayRef<ActionHandler *> Actions, ImmutableCallSite CS); + void calculateStateNumbers(const Function &F); +}; +} + void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, SelectionDAG *DAG) { Fn = &fn; MF = &mf; TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); + MachineModuleInfo &MMI = MF->getMMI(); // Check whether the function can return without sret-demotion. SmallVector<ISD::OutputArg, 4> Outs; @@ -178,13 +202,8 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // during the initial isel pass through the IR so that it is done // in a predictable order. if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) { - MachineModuleInfo &MMI = MF->getMMI(); - DIVariable DIVar(DI->getVariable()); - assert((!DIVar || DIVar.isVariable()) && - "Variable in DbgDeclareInst should be either null or a DIVariable."); - if (MMI.hasDebugInfo() && - DIVar && - !DI->getDebugLoc().isUnknown()) { + DIVariable DIVar = DI->getVariable(); + if (MMI.hasDebugInfo() && DIVar && DI->getDebugLoc()) { // Don't handle byval struct arguments or VLAs, for example. // Non-byval arguments are handled here (they refer to the stack // temporary alloca at this point). @@ -252,8 +271,179 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // Mark landing pad blocks. for (BB = Fn->begin(); BB != EB; ++BB) - if (const InvokeInst *Invoke = dyn_cast<InvokeInst>(BB->getTerminator())) + if (const auto *Invoke = dyn_cast<InvokeInst>(BB->getTerminator())) MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad(); + + // Calculate EH numbers for WinEH. + if (fn.hasFnAttribute("wineh-parent")) { + const Function *WinEHParentFn = MMI.getWinEHParent(&fn); + WinEHFuncInfo &FI = MMI.getWinEHFuncInfo(WinEHParentFn); + if (FI.LandingPadStateMap.empty()) { + WinEHNumbering Num(FI); + Num.calculateStateNumbers(*WinEHParentFn); + // Pop everything on the handler stack. + Num.processCallSite(None, ImmutableCallSite()); + } + } +} + +void WinEHNumbering::createUnwindMapEntry(int ToState, ActionHandler *AH) { + WinEHUnwindMapEntry UME; + UME.ToState = ToState; + if (auto *CH = dyn_cast_or_null<CleanupHandler>(AH)) + UME.Cleanup = cast<Function>(CH->getHandlerBlockOrFunc()); + else + UME.Cleanup = nullptr; + FuncInfo.UnwindMap.push_back(UME); +} + +void WinEHNumbering::createTryBlockMapEntry(int TryLow, int TryHigh, + ArrayRef<CatchHandler *> Handlers) { + WinEHTryBlockMapEntry TBME; + TBME.TryLow = TryLow; + TBME.TryHigh = TryHigh; + assert(TBME.TryLow <= TBME.TryHigh); + for (CatchHandler *CH : Handlers) { + WinEHHandlerType HT; + if (CH->getSelector()->isNullValue()) { + HT.Adjectives = 0x40; + HT.TypeDescriptor = nullptr; + } else { + auto *GV = cast<GlobalVariable>(CH->getSelector()->stripPointerCasts()); + // Selectors are always pointers to GlobalVariables with 'struct' type. + // The struct has two fields, adjectives and a type descriptor. + auto *CS = cast<ConstantStruct>(GV->getInitializer()); + HT.Adjectives = + cast<ConstantInt>(CS->getAggregateElement(0U))->getZExtValue(); + HT.TypeDescriptor = + cast<GlobalVariable>(CS->getAggregateElement(1)->stripPointerCasts()); + } + HT.Handler = cast<Function>(CH->getHandlerBlockOrFunc()); + HT.CatchObjRecoverIdx = CH->getExceptionVarIndex(); + TBME.HandlerArray.push_back(HT); + } + FuncInfo.TryBlockMap.push_back(TBME); +} + +static void print_name(const Value *V) { +#ifndef NDEBUG + if (!V) { + DEBUG(dbgs() << "null"); + return; + } + + if (const auto *F = dyn_cast<Function>(V)) + DEBUG(dbgs() << F->getName()); + else + DEBUG(V->dump()); +#endif +} + +void WinEHNumbering::processCallSite(ArrayRef<ActionHandler *> Actions, + ImmutableCallSite CS) { + int FirstMismatch = 0; + for (int E = std::min(HandlerStack.size(), Actions.size()); FirstMismatch < E; + ++FirstMismatch) { + if (HandlerStack[FirstMismatch]->getHandlerBlockOrFunc() != + Actions[FirstMismatch]->getHandlerBlockOrFunc()) + break; + delete Actions[FirstMismatch]; + } + + bool EnteringScope = (int)Actions.size() > FirstMismatch; + + // Don't recurse while we are looping over the handler stack. Instead, defer + // the numbering of the catch handlers until we are done popping. + SmallVector<CatchHandler *, 4> PoppedCatches; + for (int I = HandlerStack.size() - 1; I >= FirstMismatch; --I) { + if (auto *CH = dyn_cast<CatchHandler>(HandlerStack.back())) { + PoppedCatches.push_back(CH); + } else { + // Delete cleanup handlers + delete HandlerStack.back(); + } + HandlerStack.pop_back(); + } + + // We need to create a new state number if we are exiting a try scope and we + // will not push any more actions. + int TryHigh = NextState - 1; + if (!EnteringScope && !PoppedCatches.empty()) { + createUnwindMapEntry(currentEHNumber(), nullptr); + ++NextState; + } + + int LastTryLowIdx = 0; + for (int I = 0, E = PoppedCatches.size(); I != E; ++I) { + CatchHandler *CH = PoppedCatches[I]; + if (I + 1 == E || CH->getEHState() != PoppedCatches[I + 1]->getEHState()) { + int TryLow = CH->getEHState(); + auto Handlers = + makeArrayRef(&PoppedCatches[LastTryLowIdx], I - LastTryLowIdx + 1); + createTryBlockMapEntry(TryLow, TryHigh, Handlers); + LastTryLowIdx = I + 1; + } + } + + for (CatchHandler *CH : PoppedCatches) { + if (auto *F = dyn_cast<Function>(CH->getHandlerBlockOrFunc())) + calculateStateNumbers(*F); + delete CH; + } + + bool LastActionWasCatch = false; + for (size_t I = FirstMismatch; I != Actions.size(); ++I) { + // We can reuse eh states when pushing two catches for the same invoke. + bool CurrActionIsCatch = isa<CatchHandler>(Actions[I]); + // FIXME: Reenable this optimization! + if (CurrActionIsCatch && LastActionWasCatch && false) { + Actions[I]->setEHState(currentEHNumber()); + } else { + createUnwindMapEntry(currentEHNumber(), Actions[I]); + Actions[I]->setEHState(NextState); + NextState++; + DEBUG(dbgs() << "Creating unwind map entry for: ("); + print_name(Actions[I]->getHandlerBlockOrFunc()); + DEBUG(dbgs() << ", " << currentEHNumber() << ")\n"); + } + HandlerStack.push_back(Actions[I]); + LastActionWasCatch = CurrActionIsCatch; + } + + DEBUG(dbgs() << "In EHState " << currentEHNumber() << " for CallSite: "); + print_name(CS ? CS.getCalledValue() : nullptr); + DEBUG(dbgs() << '\n'); +} + +void WinEHNumbering::calculateStateNumbers(const Function &F) { + auto I = VisitedHandlers.insert(&F); + if (!I.second) + return; // We've already visited this handler, don't renumber it. + + DEBUG(dbgs() << "Calculating state numbers for: " << F.getName() << '\n'); + SmallVector<ActionHandler *, 4> ActionList; + for (const BasicBlock &BB : F) { + for (const Instruction &I : BB) { + const auto *CI = dyn_cast<CallInst>(&I); + if (!CI || CI->doesNotThrow()) + continue; + processCallSite(None, CI); + } + const auto *II = dyn_cast<InvokeInst>(BB.getTerminator()); + if (!II) + continue; + const LandingPadInst *LPI = II->getLandingPadInst(); + auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode()); + if (!ActionsCall) + continue; + assert(ActionsCall->getIntrinsicID() == Intrinsic::eh_actions); + parseEHActions(ActionsCall, ActionList); + processCallSite(ActionList, II); + ActionList.clear(); + FuncInfo.LandingPadStateMap[LPI] = currentEHNumber(); + } + + FuncInfo.CatchHandlerMaxState[&F] = NextState - 1; } /// clear - Clear out all the function-specific state. This returns this @@ -462,8 +652,7 @@ void llvm::ComputeUsesVAFloatArgument(const CallInst &I, if (FT->isVarArg() && !MMI->usesVAFloatArgument()) { for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { Type* T = I.getArgOperand(i)->getType(); - for (po_iterator<Type*> i = po_begin(T), e = po_end(T); - i != e; ++i) { + for (auto i : post_order(T)) { if (i->isFloatingPointTy()) { MMI->setUsesVAFloatArgument(true); return; diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 93699a7..64d606a 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -650,6 +650,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, MDNode *Var = SD->getVariable(); MDNode *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); + assert(cast<MDLocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); if (SD->getKind() == SDDbgValue::FRAMEIX) { // Stack address; this needs to be lowered in target-dependent fashion. diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index ece38f3..4a28a4b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4033,6 +4033,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Node->getOpcode() == ISD::SETCC) { OVT = Node->getOperand(0).getSimpleValueType(); } + if (Node->getOpcode() == ISD::BR_CC) + OVT = Node->getOperand(2).getSimpleValueType(); MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3; @@ -4188,11 +4190,28 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp1, Tmp2, Node->getOperand(2))); break; } + case ISD::BR_CC: { + unsigned ExtOp = ISD::FP_EXTEND; + if (NVT.isInteger()) { + ISD::CondCode CCCode = + cast<CondCodeSDNode>(Node->getOperand(1))->get(); + ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + } + Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2)); + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(3)); + Results.push_back(DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), + Node->getOperand(0), Node->getOperand(1), + Tmp1, Tmp2, Node->getOperand(4))); + break; + } case ISD::FADD: case ISD::FSUB: case ISD::FMUL: case ISD::FDIV: case ISD::FREM: + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FCOPYSIGN: case ISD::FPOW: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); @@ -4201,10 +4220,40 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp3, DAG.getIntPtrConstant(0))); break; } - case ISD::FLOG2: - case ISD::FEXP2: + case ISD::FMA: { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); + Tmp3 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(2)); + Results.push_back( + DAG.getNode(ISD::FP_ROUND, dl, OVT, + DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3), + DAG.getIntPtrConstant(0))); + break; + } + case ISD::FPOWI: { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = Node->getOperand(1); + Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); + Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, + Tmp3, DAG.getIntPtrConstant(0))); + break; + } + case ISD::FFLOOR: + case ISD::FCEIL: + case ISD::FRINT: + case ISD::FNEARBYINT: + case ISD::FROUND: + case ISD::FTRUNC: + case ISD::FNEG: + case ISD::FSQRT: + case ISD::FSIN: + case ISD::FCOS: case ISD::FLOG: - case ISD::FEXP: { + case ISD::FLOG2: + case ISD::FLOG10: + case ISD::FABS: + case ISD::FEXP: + case ISD::FEXP2: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index cef3fc9..9de85d7 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -593,6 +593,7 @@ private: bool SplitVectorOperand(SDNode *N, unsigned OpNo); SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo); SDValue SplitVecOp_UnaryOp(SDNode *N); + SDValue SplitVecOp_TruncateHelper(SDNode *N, unsigned TruncateOp); SDValue SplitVecOp_BITCAST(SDNode *N); SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); @@ -600,7 +601,6 @@ private: SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); - SDValue SplitVecOp_TRUNCATE(SDNode *N); SDValue SplitVecOp_VSETCC(SDNode *N); SDValue SplitVecOp_FP_ROUND(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 03c2734..408d5ed 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -379,8 +379,8 @@ SDValue VectorLegalizer::Promote(SDValue Op) { // There are currently two cases of vector promotion: // 1) Bitcasting a vector of integers to a different type to a vector of the - // same overall length. For example, x86 promotes ISD::AND on v2i32 to v1i64. - // 2) Extending a vector of floats to a vector of the same number oflarger + // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64. + // 2) Extending a vector of floats to a vector of the same number of larger // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32. MVT VT = Op.getSimpleValueType(); assert(Op.getNode()->getNumValues() == 1 && diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index f7e4557..f000902 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1293,7 +1293,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; - case ISD::TRUNCATE: Res = SplitVecOp_TRUNCATE(N); break; + case ISD::TRUNCATE: + Res = SplitVecOp_TruncateHelper(N, ISD::TRUNCATE); + break; case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; case ISD::STORE: Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo); @@ -1304,20 +1306,32 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::VSELECT: Res = SplitVecOp_VSELECT(N, OpNo); break; - case ISD::CTTZ: - case ISD::CTLZ: - case ISD::CTPOP: - case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: + if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0))) + Res = SplitVecOp_TruncateHelper(N, ISD::TRUNCATE); + else + Res = SplitVecOp_UnaryOp(N); + break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: - case ISD::FTRUNC: + if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0))) + Res = SplitVecOp_TruncateHelper(N, ISD::FTRUNC); + else + Res = SplitVecOp_UnaryOp(N); + break; + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::FP_EXTEND: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: Res = SplitVecOp_UnaryOp(N); break; + case ISD::FTRUNC: + Res = SplitVecOp_TruncateHelper(N, ISD::FTRUNC); + break; } } @@ -1581,7 +1595,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), Elts); } -SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { +SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N, + unsigned TruncateOp) { // The result type is legal, but the input type is illegal. If splitting // ends up with the result type of each half still being legal, just // do that. If, however, that would result in an illegal result type, @@ -1624,8 +1639,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements/2); - SDValue HalfLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InLoVec); - SDValue HalfHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InHiVec); + SDValue HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec); + SDValue HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec); // Concatenate them to get the full intermediate truncation result. EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements); SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo, @@ -1634,7 +1649,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { // type. This should normally be something that ends up being legal directly, // but in theory if a target has very wide vectors and an annoyingly // restricted set of legal types, this split can chain to build things up. - return DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec); + return DAG.getNode(TruncateOp, DL, OutVT, InterVec); } SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 8b54e656..fd0fa31 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -173,7 +173,7 @@ public: HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this); } - ~ScheduleDAGRRList() { + ~ScheduleDAGRRList() override { delete HazardRec; delete AvailableQueue; } @@ -1423,9 +1423,10 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // If one or more successors has been unscheduled, then the current // node is no longer available. - if (!TrySU->isAvailable) + if (!TrySU->isAvailable || !TrySU->NodeQueueId) CurSU = AvailableQueue->pop(); else { + // Available and in AvailableQueue AvailableQueue->remove(TrySU); CurSU = TrySU; } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 2cd1f4b..6351fa2 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -44,7 +44,7 @@ namespace llvm { explicit ScheduleDAGSDNodes(MachineFunction &mf); - virtual ~ScheduleDAGSDNodes() {} + ~ScheduleDAGSDNodes() override {} /// Run - perform scheduling. /// diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 418b58e..eee4a4b 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -76,7 +76,7 @@ public: HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this); } - ~ScheduleDAGVLIW() { + ~ScheduleDAGVLIW() override { delete HazardRec; delete AvailableQueue; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index b52f648..770f0b2 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2851,10 +2851,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, // FIXME: Entirely reasonable to perform folding of other unary // operations here as the need arises. break; + case ISD::TRUNCATE: + // Constant build vector truncation can be done with the original scalar + // operands but with a new build vector with the truncated value type. + return getNode(ISD::BUILD_VECTOR, DL, VT, BV->ops()); case ISD::FNEG: case ISD::FABS: + case ISD::FCEIL: + case ISD::FTRUNC: + case ISD::FFLOOR: case ISD::FP_EXTEND: - case ISD::TRUNCATE: case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: { // Let the above scalar folding handle the folding of each element. @@ -2870,6 +2876,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, } if (Ops.size() == VT.getVectorNumElements()) return getNode(ISD::BUILD_VECTOR, DL, VT, Ops); + break; } } } @@ -3628,7 +3635,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, CSEMap.InsertNode(N, IP); } else { - N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact); } @@ -3791,12 +3797,27 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), VT); } - Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value); + assert(Value.getValueType() == MVT::i8 && "memset with non-byte fill value?"); + EVT IntVT = VT.getScalarType(); + if (!IntVT.isInteger()) + IntVT = EVT::getIntegerVT(*DAG.getContext(), IntVT.getSizeInBits()); + + Value = DAG.getNode(ISD::ZERO_EXTEND, dl, IntVT, Value); if (NumBits > 8) { // Use a multiplication with 0x010101... to extend the input to the // required length. APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); - Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT)); + Value = DAG.getNode(ISD::MUL, dl, IntVT, Value, + DAG.getConstant(Magic, IntVT)); + } + + if (VT != Value.getValueType() && !VT.isInteger()) + Value = DAG.getNode(ISD::BITCAST, dl, VT.getScalarType(), Value); + if (VT != Value.getValueType()) { + assert(VT.getVectorElementType() == Value.getValueType() && + "value type should be one vector element here"); + SmallVector<SDValue, 8> BVOps(VT.getVectorNumElements(), Value); + Value = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, BVOps); } return Value; @@ -4276,7 +4297,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, + bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); @@ -4334,15 +4355,16 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, Type::getVoidTy(*getContext()), getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY), TLI->getPointerTy()), std::move(Args), 0) - .setDiscardResult(); - std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); + .setDiscardResult() + .setTailCall(isTailCall); + std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; } SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, - unsigned Align, bool isVol, + unsigned Align, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); @@ -4389,15 +4411,16 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, Type::getVoidTy(*getContext()), getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), TLI->getPointerTy()), std::move(Args), 0) - .setDiscardResult(); - std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); + .setDiscardResult() + .setTailCall(isTailCall); + std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; } SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, - unsigned Align, bool isVol, + unsigned Align, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo) { assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); @@ -4446,7 +4469,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, Type::getVoidTy(*getContext()), getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), TLI->getPointerTy()), std::move(Args), 0) - .setDiscardResult(); + .setDiscardResult() + .setTailCall(isTailCall); std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; @@ -5574,8 +5598,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, /// For IROrder, we keep the smaller of the two SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) { DebugLoc NLoc = N->getDebugLoc(); - if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) && - (OLoc.getDebugLoc() != NLoc)) { + if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) { N->setDebugLoc(DebugLoc()); } unsigned Order = std::min(N->getIROrder(), OLoc.getIROrder()); @@ -5885,6 +5908,8 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R, bool IsIndirect, uint64_t Off, DebugLoc DL, unsigned O) { + assert(cast<MDLocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); return new (Allocator) SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O); } @@ -5892,6 +5917,8 @@ SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr, const Value *C, uint64_t Off, DebugLoc DL, unsigned O) { + assert(cast<MDLocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); return new (Allocator) SDDbgValue(Var, Expr, C, Off, DL, O); } @@ -5899,6 +5926,8 @@ SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr, SDDbgValue *SelectionDAG::getFrameIndexDbgValue(MDNode *Var, MDNode *Expr, unsigned FI, uint64_t Off, DebugLoc DL, unsigned O) { + assert(cast<MDLocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); return new (Allocator) SDDbgValue(Var, Expr, FI, Off, DL, O); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 6c14e79..32d2aae 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -35,6 +35,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -997,14 +998,16 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, const DbgValueInst *DI = DDI.getDI(); DebugLoc dl = DDI.getdl(); unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); - MDNode *Variable = DI->getVariable(); - MDNode *Expr = DI->getExpression(); + MDLocalVariable *Variable = DI->getVariable(); + MDExpression *Expr = DI->getExpression(); + assert(Variable->isValidLocationForIntrinsic(dl) && + "Expected inlined-at fields to agree"); uint64_t Offset = DI->getOffset(); // A dbg.value for an alloca is always indirect. bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; SDDbgValue *SDV; if (Val.getNode()) { - if (!EmitFuncArgumentDbgValue(V, Variable, Expr, Offset, IsIndirect, + if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, IsIndirect, Val)) { SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(), IsIndirect, Offset, dl, DbgSDNodeOrder); @@ -4447,11 +4450,9 @@ static unsigned getTruncatedArgReg(const SDValue &N) { /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function /// argument, create the corresponding DBG_VALUE machine instruction for it now. /// At the end of instruction selection, they will be inserted to the entry BB. -bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, - MDNode *Variable, - MDNode *Expr, int64_t Offset, - bool IsIndirect, - const SDValue &N) { +bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( + const Value *V, MDLocalVariable *Variable, MDExpression *Expr, + MDLocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) { const Argument *Arg = dyn_cast<Argument>(V); if (!Arg) return false; @@ -4460,8 +4461,10 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); // Ignore inlined function arguments here. + // + // FIXME: Should we be checking DL->inlinedAt() to determine this? DIVariable DV(Variable); - if (DV.isInlinedFnArgument(MF.getFunction())) + if (!DV->getScope()->getSubprogram()->describes(MF.getFunction())) return false; Optional<MachineOperand> Op; @@ -4502,13 +4505,15 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, if (!Op) return false; + assert(Variable->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); if (Op->isReg()) FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE), - IsIndirect, Op->getReg(), Offset, Variable, Expr)); + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, + Op->getReg(), Offset, Variable, Expr)); else FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE)) + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE)) .addOperand(*Op) .addImm(Offset) .addMetadata(Variable) @@ -4589,9 +4594,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!Align) Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment. bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); - DAG.setRoot(DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, false, - MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)))); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, + false, isTC, + MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1))); + updateDAGForMaybeTailCall(MC); return nullptr; } case Intrinsic::memset: { @@ -4608,8 +4616,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!Align) Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); - DAG.setRoot(DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, - MachinePointerInfo(I.getArgOperand(0)))); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, + isTC, MachinePointerInfo(I.getArgOperand(0))); + updateDAGForMaybeTailCall(MS); return nullptr; } case Intrinsic::memmove: { @@ -4628,19 +4638,19 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!Align) Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment. bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); - DAG.setRoot(DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, - MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)))); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, + isTC, MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1))); + updateDAGForMaybeTailCall(MM); return nullptr; } case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); - MDNode *Variable = DI.getVariable(); - MDNode *Expression = DI.getExpression(); + MDLocalVariable *Variable = DI.getVariable(); + MDExpression *Expression = DI.getExpression(); const Value *Address = DI.getAddress(); - DIVariable DIVar(Variable); - assert((!DIVar || DIVar.isVariable()) && - "Variable in DbgDeclareInst should be either null or a DIVariable."); + DIVariable DIVar = Variable; if (!Address || !DIVar) { DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return nullptr; @@ -4663,7 +4673,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Address = BCI->getOperand(0); // Parameters are handled specially. bool isParameter = - (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable || + (DIVariable(Variable)->getTag() == dwarf::DW_TAG_arg_variable || isa<Argument>(Address)); const AllocaInst *AI = dyn_cast<AllocaInst>(Address); @@ -4677,7 +4687,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { else { // Address is an argument, so try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false, N); + EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, + N); return nullptr; } } else if (AI) @@ -4694,7 +4705,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } else { // If Address is an argument then try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false, + if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, N)) { // If variable is pinned by a alloca in dominating bb then // use StaticAllocaMap. @@ -4717,14 +4728,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::dbg_value: { const DbgValueInst &DI = cast<DbgValueInst>(I); - DIVariable DIVar(DI.getVariable()); - assert((!DIVar || DIVar.isVariable()) && - "Variable in DbgValueInst should be either null or a DIVariable."); + DIVariable DIVar = DI.getVariable(); if (!DIVar) return nullptr; - MDNode *Variable = DI.getVariable(); - MDNode *Expression = DI.getExpression(); + MDLocalVariable *Variable = DI.getVariable(); + MDExpression *Expression = DI.getExpression(); uint64_t Offset = DI.getOffset(); const Value *V = DI.getValue(); if (!V) @@ -4745,7 +4754,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (N.getNode()) { // A dbg.value for an alloca is always indirect. bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; - if (!EmitFuncArgumentDbgValue(V, Variable, Expression, Offset, + if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset, IsIndirect, N)) { SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), IsIndirect, Offset, dl, SDNodeOrder); @@ -5360,6 +5369,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::clear_cache: return TLI.getClearCacheBuiltinName(); + case Intrinsic::eh_actions: + setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); + return nullptr; case Intrinsic::donothing: // ignore return nullptr; @@ -5397,14 +5409,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Directly emit some FRAME_ALLOC machine instrs. Label assignment emission // is the same on all targets. for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) { - AllocaInst *Slot = - cast<AllocaInst>(I.getArgOperand(Idx)->stripPointerCasts()); + Value *Arg = I.getArgOperand(Idx)->stripPointerCasts(); + if (isa<ConstantPointerNull>(Arg)) + continue; // Skip null pointers. They represent a hole in index space. + AllocaInst *Slot = cast<AllocaInst>(Arg); assert(FuncInfo.StaticAllocaMap.count(Slot) && "can only escape static allocas"); int FI = FuncInfo.StaticAllocaMap[Slot]; MCSymbol *FrameAllocSym = - MF.getMMI().getContext().getOrCreateFrameAllocSymbol(MF.getName(), - Idx); + MF.getMMI().getContext().getOrCreateFrameAllocSymbol( + GlobalValue::getRealLinkageName(MF.getName()), Idx); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, TII->get(TargetOpcode::FRAME_ALLOC)) .addSym(FrameAllocSym) @@ -5424,8 +5438,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { auto *Idx = cast<ConstantInt>(I.getArgOperand(2)); unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX)); MCSymbol *FrameAllocSym = - MF.getMMI().getContext().getOrCreateFrameAllocSymbol(Fn->getName(), - IdxVal); + MF.getMMI().getContext().getOrCreateFrameAllocSymbol( + GlobalValue::getRealLinkageName(Fn->getName()), IdxVal); // Create a TargetExternalSymbol for the label to avoid any target lowering // that would make this PC relative. @@ -5446,16 +5460,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::eh_begincatch: case Intrinsic::eh_endcatch: llvm_unreachable("begin/end catch intrinsics not lowered in codegen"); - case Intrinsic::eh_unwindhelp: { - AllocaInst *Slot = - cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts()); - assert(FuncInfo.StaticAllocaMap.count(Slot) && - "can only use static allocas with llvm.eh.unwindhelp"); - int FI = FuncInfo.StaticAllocaMap[Slot]; - // TODO: Save this in the not-yet-existant WinEHFuncInfo struct. - (void)FI; - return nullptr; - } } } @@ -5546,6 +5550,11 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // Skip the first return-type Attribute to get to params. Entry.setAttributes(&CS, i - CS.arg_begin() + 1); Args.push_back(Entry); + + // If we have an explicit sret argument that is an Instruction, (i.e., it + // might point to function-local memory), we can't meaningfully tail-call. + if (Entry.isSRet && isa<Instruction>(V)) + isTailCall = false; } // Check if target-independent constraints permit a tail call here. @@ -7183,6 +7192,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Entry.Alignment = Align; CLI.getArgs().insert(CLI.getArgs().begin(), Entry); CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); + + // sret demotion isn't compatible with tail-calls, since the sret argument + // points into the callers stack frame. + CLI.IsTailCall = false; } else { for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; @@ -7790,3 +7803,17 @@ MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) { return nullptr; return I; } + +/// During lowering new call nodes can be created (such as memset, etc.). +/// Those will become new roots of the current DAG, but complications arise +/// when they are tail calls. In such cases, the call lowering will update +/// the root, but the builder still needs to know that a tail call has been +/// lowered in order to avoid generating an additional return. +void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) { + // If the node is null, we do have a tail call. + if (MaybeTC.getNode() != nullptr) + DAG.setRoot(MaybeTC); + else + HasTailCall = true; +} + diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 30240d8..a27f470 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -397,7 +397,6 @@ private: StackProtectorDescriptor() : ParentMBB(nullptr), SuccessMBB(nullptr), FailureMBB(nullptr), Guard(nullptr), GuardReg(0) { } - ~StackProtectorDescriptor() { } /// Returns true if all fields of the stack protector descriptor are /// initialized implying that we should/are ready to emit a stack protector. @@ -823,12 +822,17 @@ private: /// EmitFuncArgumentDbgValue - If V is an function argument then create /// corresponding DBG_VALUE machine instruction for it now. At the end of /// instruction selection, they will be inserted to the entry BB. - bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, MDNode *Expr, + bool EmitFuncArgumentDbgValue(const Value *V, MDLocalVariable *Variable, + MDExpression *Expr, MDLocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N); /// Return the next block after MBB, or nullptr if there is none. MachineBasicBlock *NextBlock(MachineBasicBlock *MBB); + + /// Update the DAG and DAG builder with the relevant information after + /// a new root node has been created which could be a tail call. + void updateDAGForMaybeTailCall(SDValue MaybeTC); }; } // end namespace llvm diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 5898da4..636c0a7 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -520,22 +520,20 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (getNodeId() != -1) OS << " [ID=" << getNodeId() << ']'; - DebugLoc dl = getDebugLoc(); - if (G && !dl.isUnknown()) { - DIScope - Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext())); - OS << " dbg:"; - assert((!Scope || Scope.isScope()) && - "Scope of a DebugLoc should be null or a DIScope."); - // Omit the directory, since it's usually long and uninteresting. - if (Scope) - OS << Scope.getFilename(); - else - OS << "<unknown>"; - OS << ':' << dl.getLine(); - if (dl.getCol() != 0) - OS << ':' << dl.getCol(); - } + if (!G) + return; + + MDLocation *L = getDebugLoc(); + if (!L) + return; + + if (auto *Scope = L->getScope()) + OS << Scope->getFilename(); + else + OS << "<unknown>"; + OS << ':' << L->getLine(); + if (unsigned C = L->getColumn()) + OS << ':' << C; } static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 4d2af3f..1e116dd 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -33,6 +33,7 @@ #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" @@ -500,12 +501,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { MachineBasicBlock::iterator InsertPos = Def; const MDNode *Variable = MI->getDebugVariable(); const MDNode *Expr = MI->getDebugExpression(); + DebugLoc DL = MI->getDebugLoc(); bool IsIndirect = MI->isIndirectDebugValue(); unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; + assert(cast<MDLocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); // Def is never a terminator here, so it is ok to increment InsertPos. - BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(), - TII->get(TargetOpcode::DBG_VALUE), IsIndirect, LDI->second, Offset, - Variable, Expr); + BuildMI(*EntryMBB, ++InsertPos, DL, TII->get(TargetOpcode::DBG_VALUE), + IsIndirect, LDI->second, Offset, Variable, Expr); // If this vreg is directly copied into an exported register then // that COPY instructions also need DBG_VALUE, if it is the only @@ -523,9 +526,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { CopyUseMI = nullptr; break; } if (CopyUseMI) { + // Use MI's debug location, which describes where Variable was + // declared, rather than whatever is attached to CopyUseMI. MachineInstr *NewMI = - BuildMI(*MF, CopyUseMI->getDebugLoc(), - TII->get(TargetOpcode::DBG_VALUE), IsIndirect, + BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, CopyUseMI->getOperand(0).getReg(), Offset, Variable, Expr); MachineBasicBlock::iterator Pos = CopyUseMI; EntryMBB->insertAfter(Pos, NewMI); @@ -670,8 +674,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #endif { BlockNumber = FuncInfo->MBB->getNumber(); - BlockName = MF->getName().str() + ":" + - FuncInfo->MBB->getBasicBlock()->getName().str(); + BlockName = + (MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str(); } DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); @@ -929,53 +933,74 @@ void SelectionDAGISel::PrepareEHLandingPad() { const LandingPadInst *LPadInst = LLVMBB->getLandingPadInst(); MF->getMMI().addPersonality( MBB, cast<Function>(LPadInst->getPersonalityFn()->stripPointerCasts())); - if (MF->getMMI().getPersonalityType() == EHPersonality::MSVC_Win64SEH) { - // Make virtual registers and a series of labels that fill in values for the - // clauses. - auto &RI = MF->getRegInfo(); - FuncInfo->ExceptionSelectorVirtReg = RI.createVirtualRegister(PtrRC); + EHPersonality Personality = MF->getMMI().getPersonalityType(); - // Get all invoke BBs that will unwind into the clause BBs. + if (isMSVCEHPersonality(Personality)) { + SmallVector<MachineBasicBlock *, 4> ClauseBBs; + const IntrinsicInst *Actions = + dyn_cast<IntrinsicInst>(LLVMBB->getFirstInsertionPt()); + // Get all invoke BBs that unwind to this landingpad. SmallVector<MachineBasicBlock *, 4> InvokeBBs(MBB->pred_begin(), MBB->pred_end()); + if (Actions && Actions->getIntrinsicID() == Intrinsic::eh_actions) { + // If this is a call to llvm.eh.actions followed by indirectbr, then we've + // run WinEHPrepare, and we should remove this block from the machine CFG. + // Mark the targets of the indirectbr as landingpads instead. + for (const BasicBlock *LLVMSucc : successors(LLVMBB)) { + MachineBasicBlock *ClauseBB = FuncInfo->MBBMap[LLVMSucc]; + // Add the edge from the invoke to the clause. + for (MachineBasicBlock *InvokeBB : InvokeBBs) + InvokeBB->addSuccessor(ClauseBB); + } + } else { + // Otherwise, we haven't done the preparation, and we need to invent some + // clause basic blocks that branch into the landingpad. + // FIXME: Remove this code once SEH preparation works. + + // Make virtual registers and a series of labels that fill in values for + // the clauses. + auto &RI = MF->getRegInfo(); + FuncInfo->ExceptionSelectorVirtReg = RI.createVirtualRegister(PtrRC); + + // Emit separate machine basic blocks with separate labels for each clause + // before the main landing pad block. + MachineInstrBuilder SelectorPHI = BuildMI( + *MBB, MBB->begin(), SDB->getCurDebugLoc(), + TII->get(TargetOpcode::PHI), FuncInfo->ExceptionSelectorVirtReg); + for (unsigned I = 0, E = LPadInst->getNumClauses(); I != E; ++I) { + // Skip filter clauses, we can't implement them. + if (LPadInst->isFilter(I)) + continue; - // Emit separate machine basic blocks with separate labels for each clause - // before the main landing pad block. - MachineInstrBuilder SelectorPHI = BuildMI( - *MBB, MBB->begin(), SDB->getCurDebugLoc(), TII->get(TargetOpcode::PHI), - FuncInfo->ExceptionSelectorVirtReg); - for (unsigned I = 0, E = LPadInst->getNumClauses(); I != E; ++I) { - // Skip filter clauses, we can't implement them yet. - if (LPadInst->isFilter(I)) - continue; - - MachineBasicBlock *ClauseBB = MF->CreateMachineBasicBlock(LLVMBB); - MF->insert(MBB, ClauseBB); + MachineBasicBlock *ClauseBB = MF->CreateMachineBasicBlock(LLVMBB); + MF->insert(MBB, ClauseBB); - // Add the edge from the invoke to the clause. - for (MachineBasicBlock *InvokeBB : InvokeBBs) - InvokeBB->addSuccessor(ClauseBB); + // Add the edge from the invoke to the clause. + for (MachineBasicBlock *InvokeBB : InvokeBBs) + InvokeBB->addSuccessor(ClauseBB); - // Mark the clause as a landing pad or MI passes will delete it. - ClauseBB->setIsLandingPad(); + // Mark the clause as a landing pad or MI passes will delete it. + ClauseBB->setIsLandingPad(); - GlobalValue *ClauseGV = ExtractTypeInfo(LPadInst->getClause(I)); + GlobalValue *ClauseGV = ExtractTypeInfo(LPadInst->getClause(I)); - // Start the BB with a label. - MCSymbol *ClauseLabel = MF->getMMI().addClauseForLandingPad(MBB); - BuildMI(*ClauseBB, ClauseBB->begin(), SDB->getCurDebugLoc(), II) - .addSym(ClauseLabel); + // Start the BB with a label. + MCSymbol *ClauseLabel = MF->getMMI().addClauseForLandingPad(MBB); + BuildMI(*ClauseBB, ClauseBB->begin(), SDB->getCurDebugLoc(), II) + .addSym(ClauseLabel); - // Construct a simple BB that defines a register with the typeid constant. - FuncInfo->MBB = ClauseBB; - FuncInfo->InsertPt = ClauseBB->end(); - unsigned VReg = SDB->visitLandingPadClauseBB(ClauseGV, MBB); - CurDAG->setRoot(SDB->getRoot()); - SDB->clear(); - CodeGenAndEmitDAG(); + // Construct a simple BB that defines a register with the typeid + // constant. + FuncInfo->MBB = ClauseBB; + FuncInfo->InsertPt = ClauseBB->end(); + unsigned VReg = SDB->visitLandingPadClauseBB(ClauseGV, MBB); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); - // Add the typeid virtual register to the phi in the main landing pad. - SelectorPHI.addReg(VReg).addMBB(ClauseBB); + // Add the typeid virtual register to the phi in the main landing pad. + SelectorPHI.addReg(VReg).addMBB(ClauseBB); + } } // Remove the edge from the invoke to the lpad. @@ -986,6 +1011,12 @@ void SelectionDAGISel::PrepareEHLandingPad() { // pad block. FuncInfo->MBB = MBB; FuncInfo->InsertPt = MBB->end(); + + // Transfer EH state number assigned to the IR block to the MBB. + if (Personality == EHPersonality::MSVC_CXX) { + WinEHFuncInfo &FI = MF->getMMI().getWinEHFuncInfo(MF->getFunction()); + MF->getMMI().addWinEHState(MBB, FI.LandingPadStateMap[LPadInst]); + } return; } @@ -1165,7 +1196,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Setup an EH landing-pad block. FuncInfo->ExceptionPointerVirtReg = 0; FuncInfo->ExceptionSelectorVirtReg = 0; - if (FuncInfo->MBB->isLandingPad()) + if (LLVMBB->isLandingPad()) PrepareEHLandingPad(); // Before doing SelectionDAG ISel, see if FastISel has been requested. @@ -2552,7 +2583,7 @@ public: SelectionDAG::DAGUpdateListener(DAG), RecordedNodes(RN), MatchScopes(MS) { } - void NodeDeleted(SDNode *N, SDNode *E) { + void NodeDeleted(SDNode *N, SDNode *E) override { // Some early-returns here to avoid the search if we deleted the node or // if the update comes from MorphNodeTo (MorphNodeTo is the last thing we // do, so it's unnecessary to update matching state at that point). diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 3cc7a98..a9ffa72 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -397,10 +397,11 @@ static void lowerIncomingStatepointValue(SDValue Incoming, Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); Ops.push_back(Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) { - // This handles allocas as arguments to the statepoint - const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); - Ops.push_back( - Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy())); + // This handles allocas as arguments to the statepoint (this is only + // really meaningful for a deopt value. For GC, we'd be trying to + // relocate the address of the alloca itself?) + Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), + Incoming.getValueType())); } else { // Otherwise, locate a spill slot and explicitly spill it so it // can be found by the runtime later. We currently do not support @@ -441,27 +442,25 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // heap. This is basically just here to help catch errors during statepoint // insertion. TODO: This should actually be in the Verifier, but we can't get // to the GCStrategy from there (yet). - if (Builder.GFI) { - GCStrategy &S = Builder.GFI->getStrategy(); - for (const Value *V : Bases) { - auto Opt = S.isGCManagedPointer(V); - if (Opt.hasValue()) { - assert(Opt.getValue() && - "non gc managed base pointer found in statepoint"); - } + GCStrategy &S = Builder.GFI->getStrategy(); + for (const Value *V : Bases) { + auto Opt = S.isGCManagedPointer(V); + if (Opt.hasValue()) { + assert(Opt.getValue() && + "non gc managed base pointer found in statepoint"); } - for (const Value *V : Ptrs) { - auto Opt = S.isGCManagedPointer(V); - if (Opt.hasValue()) { - assert(Opt.getValue() && - "non gc managed derived pointer found in statepoint"); - } + } + for (const Value *V : Ptrs) { + auto Opt = S.isGCManagedPointer(V); + if (Opt.hasValue()) { + assert(Opt.getValue() && + "non gc managed derived pointer found in statepoint"); } - for (const Value *V : Relocations) { - auto Opt = S.isGCManagedPointer(V); - if (Opt.hasValue()) { - assert(Opt.getValue() && "non gc managed pointer relocated"); - } + } + for (const Value *V : Relocations) { + auto Opt = S.isGCManagedPointer(V); + if (Opt.hasValue()) { + assert(Opt.getValue() && "non gc managed pointer relocated"); } } #endif @@ -523,6 +522,21 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, SDValue Incoming = Builder.getValue(V); lowerIncomingStatepointValue(Incoming, Ops, Builder); } + + // If there are any explicit spill slots passed to the statepoint, record + // them, but otherwise do not do anything special. These are user provided + // allocas and give control over placement to the consumer. In this case, + // it is the contents of the slot which may get updated, not the pointer to + // the alloca + for (Value *V : StatepointSite.gc_args()) { + SDValue Incoming = Builder.getValue(V); + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) { + // This handles allocas as arguments to the statepoint + Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), + Incoming.getValueType())); + + } + } } void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) { @@ -565,10 +579,8 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, // TODO: This if should become an assert. For now, we allow the GCStrategy // to be optional for backwards compatibility. This will only last a short // period (i.e. a couple of weeks). - if (GFI) { - assert(GFI->getStrategy().useStatepoints() && - "GCStrategy does not expect to encounter statepoints"); - } + assert(GFI->getStrategy().useStatepoints() && + "GCStrategy does not expect to encounter statepoints"); #endif // Lower statepoint vmstate and gcstate arguments @@ -614,7 +626,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, // Add a leading constant argument with the Flags and the calling convention // masked together CallingConv::ID CallConv = CS.getCallingConv(); - int Flags = dyn_cast<ConstantInt>(CS.getArgument(2))->getZExtValue(); + int Flags = cast<ConstantInt>(CS.getArgument(2))->getZExtValue(); assert(Flags == 0 && "not expected to be used"); Ops.push_back(DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); Ops.push_back( diff --git a/lib/CodeGen/ShadowStackGCLowering.cpp b/lib/CodeGen/ShadowStackGCLowering.cpp index 66a6a3c..7c0b2bb 100644 --- a/lib/CodeGen/ShadowStackGCLowering.cpp +++ b/lib/CodeGen/ShadowStackGCLowering.cpp @@ -239,7 +239,7 @@ Constant *ShadowStackGCLowering::GetFrameMap(Function &F) { Constant *GEPIndices[2] = { ConstantInt::get(Type::getInt32Ty(F.getContext()), 0), ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)}; - return ConstantExpr::getGetElementPtr(GV, GEPIndices); + return ConstantExpr::getGetElementPtr(FrameMap->getType(), GV, GEPIndices); } Type *ShadowStackGCLowering::GetConcreteStackEntryType(Function &F) { @@ -249,7 +249,7 @@ Type *ShadowStackGCLowering::GetConcreteStackEntryType(Function &F) { for (size_t I = 0; I != Roots.size(); I++) EltTys.push_back(Roots[I].second->getAllocatedType()); - return StructType::create(EltTys, "gc_stackentry." + F.getName().str()); + return StructType::create(EltTys, ("gc_stackentry." + F.getName()).str()); } /// doInitialization - If this module uses the GC intrinsics, find them now. If diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 2335a88..0635173 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -46,6 +46,8 @@ STATISTIC(NumSpilled, "Number of registers live across unwind edges"); namespace { class SjLjEHPrepare : public FunctionPass { const TargetMachine *TM; + Type *doubleUnderDataTy; + Type *doubleUnderJBufTy; Type *FunctionContextTy; Constant *RegisterFn; Constant *UnregisterFn; @@ -93,12 +95,14 @@ bool SjLjEHPrepare::doInitialization(Module &M) { // builtin_setjmp uses a five word jbuf Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); Type *Int32Ty = Type::getInt32Ty(M.getContext()); - FunctionContextTy = StructType::get(VoidPtrTy, // __prev - Int32Ty, // call_site - ArrayType::get(Int32Ty, 4), // __data - VoidPtrTy, // __personality - VoidPtrTy, // __lsda - ArrayType::get(VoidPtrTy, 5), // __jbuf + doubleUnderDataTy = ArrayType::get(Int32Ty, 4); + doubleUnderJBufTy = ArrayType::get(VoidPtrTy, 5); + FunctionContextTy = StructType::get(VoidPtrTy, // __prev + Int32Ty, // call_site + doubleUnderDataTy, // __data + VoidPtrTy, // __personality + VoidPtrTy, // __lsda + doubleUnderJBufTy, // __jbuf nullptr); RegisterFn = M.getOrInsertFunction( "_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()), @@ -204,16 +208,17 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F, IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt()); // Reference the __data field. - Value *FCData = Builder.CreateConstGEP2_32(FuncCtx, 0, 2, "__data"); + Value *FCData = + Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 2, "__data"); // The exception values come back in context->__data[0]. - Value *ExceptionAddr = - Builder.CreateConstGEP2_32(FCData, 0, 0, "exception_gep"); + Value *ExceptionAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData, + 0, 0, "exception_gep"); Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val"); ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy()); - Value *SelectorAddr = - Builder.CreateConstGEP2_32(FCData, 0, 1, "exn_selector_gep"); + Value *SelectorAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData, + 0, 1, "exn_selector_gep"); Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val"); substituteLPadValues(LPI, ExnVal, SelVal); @@ -223,15 +228,16 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F, IRBuilder<> Builder(EntryBB->getTerminator()); if (!PersonalityFn) PersonalityFn = LPads[0]->getPersonalityFn(); - Value *PersonalityFieldPtr = - Builder.CreateConstGEP2_32(FuncCtx, 0, 3, "pers_fn_gep"); + Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32( + FunctionContextTy, FuncCtx, 0, 3, "pers_fn_gep"); Builder.CreateStore( Builder.CreateBitCast(PersonalityFn, Builder.getInt8PtrTy()), PersonalityFieldPtr, /*isVolatile=*/true); // LSDA address Value *LSDA = Builder.CreateCall(LSDAAddrFn, "lsda_addr"); - Value *LSDAFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 4, "lsda_gep"); + Value *LSDAFieldPtr = + Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 4, "lsda_gep"); Builder.CreateStore(LSDA, LSDAFieldPtr, /*isVolatile=*/true); return FuncCtx; @@ -400,16 +406,19 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { IRBuilder<> Builder(EntryBB->getTerminator()); // Get a reference to the jump buffer. - Value *JBufPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 5, "jbuf_gep"); + Value *JBufPtr = + Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 5, "jbuf_gep"); // Save the frame pointer. - Value *FramePtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 0, "jbuf_fp_gep"); + Value *FramePtr = Builder.CreateConstGEP2_32(doubleUnderJBufTy, JBufPtr, 0, 0, + "jbuf_fp_gep"); Value *Val = Builder.CreateCall(FrameAddrFn, Builder.getInt32(0), "fp"); Builder.CreateStore(Val, FramePtr, /*isVolatile=*/true); // Save the stack pointer. - Value *StackPtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 2, "jbuf_sp_gep"); + Value *StackPtr = Builder.CreateConstGEP2_32(doubleUnderJBufTy, JBufPtr, 0, 2, + "jbuf_sp_gep"); Val = Builder.CreateCall(StackAddrFn, "sp"); Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true); diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h index 622361e..03dd58d 100644 --- a/lib/CodeGen/SpillPlacement.h +++ b/lib/CodeGen/SpillPlacement.h @@ -70,7 +70,7 @@ public: static char ID; // Pass identification, replacement for typeid. SpillPlacement() : MachineFunctionPass(ID), nodes(nullptr) {} - ~SpillPlacement() { releaseMemory(); } + ~SpillPlacement() override { releaseMemory(); } /// BorderConstraint - A basic block has separate constraints for entry and /// exit. diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index 7572803..2bf2d64 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -464,7 +464,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { continue; if (SlotRemap.count(VI.Slot)) { DEBUG(dbgs() << "Remapping debug info for [" - << DIVariable(VI.Var).getName() << "].\n"); + << cast<MDLocalVariable>(VI.Var)->getName() << "].\n"); VI.Slot = SlotRemap[VI.Slot]; FixedDbg++; } diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 58a6d52..2162a51 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -1246,20 +1246,13 @@ void TargetLoweringBase::computeRegisterProperties( ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat); } - // Decide how to handle f32. If the target does not have native support for - // f32, promote it to f64 if it is legal. Otherwise, expand it to i32. + // Decide how to handle f32. If the target does not have native f32 support, + // expand it to i32 and we will be generating soft float library calls. if (!isTypeLegal(MVT::f32)) { - if (isTypeLegal(MVT::f64)) { - NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64]; - RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64]; - TransformToType[MVT::f32] = MVT::f64; - ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger); - } else { - NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32]; - RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32]; - TransformToType[MVT::f32] = MVT::i32; - ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat); - } + NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32]; + RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32]; + TransformToType[MVT::f32] = MVT::i32; + ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat); } if (!isTypeLegal(MVT::f16)) { diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index bcf2aa7..5b795e4 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -245,9 +245,11 @@ static StringRef getSectionPrefixForGlobal(SectionKind Kind) { return ".data.rel.ro"; } -static const MCSectionELF *selectELFSectionForGlobal( - MCContext &Ctx, const GlobalValue *GV, SectionKind Kind, Mangler &Mang, - const TargetMachine &TM, bool EmitUniqueSection, unsigned Flags) { +static const MCSectionELF * +selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV, + SectionKind Kind, Mangler &Mang, + const TargetMachine &TM, bool EmitUniqueSection, + unsigned Flags, unsigned *NextUniqueID) { unsigned EntrySize = 0; if (Kind.isMergeableCString()) { if (Kind.isMergeable2ByteCString()) { @@ -297,9 +299,13 @@ static const MCSectionELF *selectELFSectionForGlobal( Name.push_back('.'); TM.getNameWithPrefix(Name, GV, Mang, true); } + unsigned UniqueID = ~0; + if (EmitUniqueSection && !UniqueSectionNames) { + UniqueID = *NextUniqueID; + (*NextUniqueID)++; + } return Ctx.getELFSection(Name, getELFSectionType(Name, Kind), Flags, - EntrySize, Group, - EmitUniqueSection && !UniqueSectionNames); + EntrySize, Group, UniqueID); } const MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal( @@ -319,7 +325,7 @@ const MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal( EmitUniqueSection |= GV->hasComdat(); return selectELFSectionForGlobal(getContext(), GV, Kind, Mang, TM, - EmitUniqueSection, Flags); + EmitUniqueSection, Flags, &NextUniqueID); } const MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable( @@ -332,7 +338,8 @@ const MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable( return ReadOnlySection; return selectELFSectionForGlobal(getContext(), &F, SectionKind::getReadOnly(), - Mang, TM, EmitUniqueSection, ELF::SHF_ALLOC); + Mang, TM, EmitUniqueSection, ELF::SHF_ALLOC, + &NextUniqueID); } bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection( diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp index ab0f96e..35b944e 100644 --- a/lib/CodeGen/WinEHPrepare.cpp +++ b/lib/CodeGen/WinEHPrepare.cpp @@ -20,6 +20,8 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/LibCallSemantics.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -33,6 +35,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" #include <memory> using namespace llvm; @@ -49,14 +52,15 @@ namespace { // frame allocation structure. typedef MapVector<Value *, TinyPtrVector<AllocaInst *>> FrameVarInfoMap; -typedef SmallSet<BasicBlock *, 4> VisitedBlockSet; +// TinyPtrVector cannot hold nullptr, so we need our own sentinel that isn't +// quite null. +AllocaInst *getCatchObjectSentinel() { + return static_cast<AllocaInst *>(nullptr) + 1; +} -enum ActionType { Catch, Cleanup }; +typedef SmallSet<BasicBlock *, 4> VisitedBlockSet; class LandingPadActions; -class ActionHandler; -class CatchHandler; -class CleanupHandler; class LandingPadMap; typedef DenseMap<const BasicBlock *, CatchHandler *> CatchHandlerMapTy; @@ -66,7 +70,7 @@ class WinEHPrepare : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid. WinEHPrepare(const TargetMachine *TM = nullptr) - : FunctionPass(ID) {} + : FunctionPass(ID), DT(nullptr) {} bool runOnFunction(Function &Fn) override; @@ -81,31 +85,62 @@ public: private: bool prepareExceptionHandlers(Function &F, SmallVectorImpl<LandingPadInst *> &LPads); + void promoteLandingPadValues(LandingPadInst *LPad); + void completeNestedLandingPad(Function *ParentFn, + LandingPadInst *OutlinedLPad, + const LandingPadInst *OriginalLPad, + FrameVarInfoMap &VarInfo); bool outlineHandler(ActionHandler *Action, Function *SrcFn, LandingPadInst *LPad, BasicBlock *StartBB, FrameVarInfoMap &VarInfo); + void addStubInvokeToHandlerIfNeeded(Function *Handler, Value *PersonalityFn); void mapLandingPadBlocks(LandingPadInst *LPad, LandingPadActions &Actions); CatchHandler *findCatchHandler(BasicBlock *BB, BasicBlock *&NextBB, VisitedBlockSet &VisitedBlocks); - CleanupHandler *findCleanupHandler(BasicBlock *StartBB, BasicBlock *EndBB); + void findCleanupHandlers(LandingPadActions &Actions, BasicBlock *StartBB, + BasicBlock *EndBB); void processSEHCatchHandler(CatchHandler *Handler, BasicBlock *StartBB); // All fields are reset by runOnFunction. + DominatorTree *DT; EHPersonality Personality; CatchHandlerMapTy CatchHandlerMap; CleanupHandlerMapTy CleanupHandlerMap; - DenseMap<const LandingPadInst *, LandingPadMap> LPadMaps; + DenseMap<const LandingPadInst *, LandingPadMap> LPadMaps; + + // This maps landing pad instructions found in outlined handlers to + // the landing pad instruction in the parent function from which they + // were cloned. The cloned/nested landing pad is used as the key + // because the landing pad may be cloned into multiple handlers. + // This map will be used to add the llvm.eh.actions call to the nested + // landing pads after all handlers have been outlined. + DenseMap<LandingPadInst *, const LandingPadInst *> NestedLPtoOriginalLP; + + // This maps blocks in the parent function which are destinations of + // catch handlers to cloned blocks in (other) outlined handlers. This + // handles the case where a nested landing pads has a catch handler that + // returns to a handler function rather than the parent function. + // The original block is used as the key here because there should only + // ever be one handler function from which the cloned block is not pruned. + // The original block will be pruned from the parent function after all + // handlers have been outlined. This map will be used to adjust the + // return instructions of handlers which return to the block that was + // outlined into a handler. This is done after all handlers have been + // outlined but before the outlined code is pruned from the parent function. + DenseMap<const BasicBlock *, BasicBlock *> LPadTargetBlocks; }; class WinEHFrameVariableMaterializer : public ValueMaterializer { public: WinEHFrameVariableMaterializer(Function *OutlinedFn, FrameVarInfoMap &FrameVarInfo); - ~WinEHFrameVariableMaterializer() {} + ~WinEHFrameVariableMaterializer() override {} + + Value *materializeValueFor(Value *V) override; - virtual Value *materializeValueFor(Value *V) override; + void escapeCatchObject(Value *V); private: FrameVarInfoMap &FrameVarInfo; @@ -119,42 +154,23 @@ public: bool isInitialized() { return OriginLPad != nullptr; } - bool mapIfEHPtrLoad(const LoadInst *Load) { - return mapIfEHLoad(Load, EHPtrStores, EHPtrStoreAddrs); - } - bool mapIfSelectorLoad(const LoadInst *Load) { - return mapIfEHLoad(Load, SelectorStores, SelectorStoreAddrs); - } - + bool isOriginLandingPadBlock(const BasicBlock *BB) const; bool isLandingPadSpecificInst(const Instruction *Inst) const; - void remapSelector(ValueToValueMapTy &VMap, Value *MappedValue) const; + void remapEHValues(ValueToValueMapTy &VMap, Value *EHPtrValue, + Value *SelectorValue) const; private: - bool mapIfEHLoad(const LoadInst *Load, - SmallVectorImpl<const StoreInst *> &Stores, - SmallVectorImpl<const Value *> &StoreAddrs); - const LandingPadInst *OriginLPad; // We will normally only see one of each of these instructions, but // if more than one occurs for some reason we can handle that. TinyPtrVector<const ExtractValueInst *> ExtractedEHPtrs; TinyPtrVector<const ExtractValueInst *> ExtractedSelectors; - - // In optimized code, there will typically be at most one instance of - // each of the following, but in unoptimized IR it is not uncommon - // for the values to be stored, loaded and then stored again. In that - // case we will create a second entry for each store and store address. - SmallVector<const StoreInst *, 2> EHPtrStores; - SmallVector<const StoreInst *, 2> SelectorStores; - SmallVector<const Value *, 2> EHPtrStoreAddrs; - SmallVector<const Value *, 2> SelectorStoreAddrs; }; class WinEHCloningDirectorBase : public CloningDirector { public: - WinEHCloningDirectorBase(Function *HandlerFn, - FrameVarInfoMap &VarInfo, + WinEHCloningDirectorBase(Function *HandlerFn, FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap) : Materializer(HandlerFn, VarInfo), SelectorIDType(Type::getInt32Ty(HandlerFn->getContext())), @@ -180,6 +196,9 @@ public: virtual CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume, BasicBlock *NewBB) = 0; + virtual CloningAction handleLandingPad(ValueToValueMapTy &VMap, + const LandingPadInst *LPad, + BasicBlock *NewBB) = 0; ValueMaterializer *getValueMaterializer() override { return &Materializer; } @@ -192,11 +211,13 @@ protected: class WinEHCatchDirector : public WinEHCloningDirectorBase { public: - WinEHCatchDirector(Function *CatchFn, Value *Selector, - FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap) + WinEHCatchDirector( + Function *CatchFn, Value *Selector, FrameVarInfoMap &VarInfo, + LandingPadMap &LPadMap, + DenseMap<LandingPadInst *, const LandingPadInst *> &NestedLPads) : WinEHCloningDirectorBase(CatchFn, VarInfo, LPadMap), CurrentSelector(Selector->stripPointerCasts()), - ExceptionObjectVar(nullptr) {} + ExceptionObjectVar(nullptr), NestedLPtoOriginalLP(NestedLPads) {} CloningAction handleBeginCatch(ValueToValueMapTy &VMap, const Instruction *Inst, @@ -210,21 +231,28 @@ public: BasicBlock *NewBB) override; CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume, BasicBlock *NewBB) override; + CloningAction handleLandingPad(ValueToValueMapTy &VMap, + const LandingPadInst *LPad, + BasicBlock *NewBB) override; - const Value *getExceptionVar() { return ExceptionObjectVar; } + Value *getExceptionVar() { return ExceptionObjectVar; } TinyPtrVector<BasicBlock *> &getReturnTargets() { return ReturnTargets; } private: Value *CurrentSelector; - const Value *ExceptionObjectVar; + Value *ExceptionObjectVar; TinyPtrVector<BasicBlock *> ReturnTargets; + + // This will be a reference to the field of the same name in the WinEHPrepare + // object which instantiates this WinEHCatchDirector object. + DenseMap<LandingPadInst *, const LandingPadInst *> &NestedLPtoOriginalLP; }; class WinEHCleanupDirector : public WinEHCloningDirectorBase { public: - WinEHCleanupDirector(Function *CleanupFn, - FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap) + WinEHCleanupDirector(Function *CleanupFn, FrameVarInfoMap &VarInfo, + LandingPadMap &LPadMap) : WinEHCloningDirectorBase(CleanupFn, VarInfo, LPadMap) {} CloningAction handleBeginCatch(ValueToValueMapTy &VMap, @@ -239,66 +267,9 @@ public: BasicBlock *NewBB) override; CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume, BasicBlock *NewBB) override; -}; - -class ActionHandler { -public: - ActionHandler(BasicBlock *BB, ActionType Type) - : StartBB(BB), Type(Type), HandlerBlockOrFunc(nullptr) {} - - ActionType getType() const { return Type; } - BasicBlock *getStartBlock() const { return StartBB; } - - bool hasBeenProcessed() { return HandlerBlockOrFunc != nullptr; } - - void setHandlerBlockOrFunc(Constant *F) { HandlerBlockOrFunc = F; } - Constant *getHandlerBlockOrFunc() { return HandlerBlockOrFunc; } - -private: - BasicBlock *StartBB; - ActionType Type; - - // Can be either a BlockAddress or a Function depending on the EH personality. - Constant *HandlerBlockOrFunc; -}; - -class CatchHandler : public ActionHandler { -public: - CatchHandler(BasicBlock *BB, Constant *Selector, BasicBlock *NextBB) - : ActionHandler(BB, ActionType::Catch), Selector(Selector), - NextBB(NextBB), ExceptionObjectVar(nullptr) {} - - // Method for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const ActionHandler *H) { - return H->getType() == ActionType::Catch; - } - - Constant *getSelector() const { return Selector; } - BasicBlock *getNextBB() const { return NextBB; } - - const Value *getExceptionVar() { return ExceptionObjectVar; } - TinyPtrVector<BasicBlock *> &getReturnTargets() { return ReturnTargets; } - - void setExceptionVar(const Value *Val) { ExceptionObjectVar = Val; } - void setReturnTargets(TinyPtrVector<BasicBlock *> &Targets) { - ReturnTargets = Targets; - } - -private: - Constant *Selector; - BasicBlock *NextBB; - const Value *ExceptionObjectVar; - TinyPtrVector<BasicBlock *> ReturnTargets; -}; - -class CleanupHandler : public ActionHandler { -public: - CleanupHandler(BasicBlock *BB) : ActionHandler(BB, ActionType::Cleanup) {} - - // Method for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const ActionHandler *H) { - return H->getType() == ActionType::Cleanup; - } + CloningAction handleLandingPad(ValueToValueMapTy &VMap, + const LandingPadInst *LPad, + BasicBlock *NewBB) override; }; class LandingPadActions { @@ -313,6 +284,7 @@ public: bool includesCleanup() const { return HasCleanupHandlers; } + SmallVectorImpl<ActionHandler *> &actions() { return Actions; } SmallVectorImpl<ActionHandler *>::iterator begin() { return Actions.begin(); } SmallVectorImpl<ActionHandler *>::iterator end() { return Actions.end(); } @@ -336,8 +308,8 @@ FunctionPass *llvm::createWinEHPass(const TargetMachine *TM) { // FIXME: Remove this once the backend can handle the prepared IR. static cl::opt<bool> -SEHPrepare("sehprepare", cl::Hidden, - cl::desc("Prepare functions with SEH personalities")); + SEHPrepare("sehprepare", cl::Hidden, + cl::desc("Prepare functions with SEH personalities")); bool WinEHPrepare::runOnFunction(Function &Fn) { SmallVector<LandingPadInst *, 4> LPads; @@ -360,6 +332,8 @@ bool WinEHPrepare::runOnFunction(Function &Fn) { if (!isMSVCEHPersonality(Personality)) return false; + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + if (isAsynchronousEHPersonality(Personality) && !SEHPrepare) { // Replace all resume instructions with unreachable. // FIXME: Remove this once the backend can handle the prepared IR. @@ -375,11 +349,11 @@ bool WinEHPrepare::runOnFunction(Function &Fn) { return true; } -bool WinEHPrepare::doFinalization(Module &M) { - return false; -} +bool WinEHPrepare::doFinalization(Module &M) { return false; } -void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {} +void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<DominatorTreeWrapperPass>(); +} bool WinEHPrepare::prepareExceptionHandlers( Function &F, SmallVectorImpl<LandingPadInst *> &LPads) { @@ -422,9 +396,14 @@ bool WinEHPrepare::prepareExceptionHandlers( if (LPadHasActionList) continue; + // If either of the values in the aggregate returned by the landing pad is + // extracted and stored to memory, promote the stored value to a register. + promoteLandingPadValues(LPad); + LandingPadActions Actions; mapLandingPadBlocks(LPad, Actions); + HandlersOutlined |= !Actions.actions().empty(); for (ActionHandler *Action : Actions) { if (Action->hasBeenProcessed()) continue; @@ -436,24 +415,17 @@ bool WinEHPrepare::prepareExceptionHandlers( if (isAsynchronousEHPersonality(Personality)) { if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) { processSEHCatchHandler(CatchAction, StartBB); - HandlersOutlined = true; continue; } } - if (outlineHandler(Action, &F, LPad, StartBB, FrameVarInfo)) { - HandlersOutlined = true; - } - } // End for each Action - - // FIXME: We need a guard against partially outlined functions. - if (!HandlersOutlined) - continue; + outlineHandler(Action, &F, LPad, StartBB, FrameVarInfo); + } // Replace the landing pad with a new llvm.eh.action based landing pad. BasicBlock *NewLPadBB = BasicBlock::Create(Context, "lpad", &F, LPadBB); assert(!isa<PHINode>(LPadBB->begin())); - Instruction *NewLPad = LPad->clone(); + auto *NewLPad = cast<LandingPadInst>(LPad->clone()); NewLPadBB->getInstList().push_back(NewLPad); while (!pred_empty(LPadBB)) { auto *pred = *pred_begin(LPadBB); @@ -461,6 +433,19 @@ bool WinEHPrepare::prepareExceptionHandlers( Invoke->setUnwindDest(NewLPadBB); } + // If anyone is still using the old landingpad value, just give them undef + // instead. The eh pointer and selector values are not real. + LPad->replaceAllUsesWith(UndefValue::get(LPad->getType())); + + // Replace the mapping of any nested landing pad that previously mapped + // to this landing pad with a referenced to the cloned version. + for (auto &LPadPair : NestedLPtoOriginalLP) { + const LandingPadInst *OriginalLPad = LPadPair.second; + if (OriginalLPad == LPad) { + LPadPair.second = NewLPad; + } + } + // Replace uses of the old lpad in phis with this block and delete the old // block. LPadBB->replaceSuccessorsPhiUsesWith(NewLPadBB); @@ -474,11 +459,17 @@ bool WinEHPrepare::prepareExceptionHandlers( if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) { ActionArgs.push_back(ConstantInt::get(Int32Type, 1)); ActionArgs.push_back(CatchAction->getSelector()); + // Find the frame escape index of the exception object alloca in the + // parent. + int FrameEscapeIdx = -1; Value *EHObj = const_cast<Value *>(CatchAction->getExceptionVar()); - if (EHObj) - ActionArgs.push_back(EHObj); - else - ActionArgs.push_back(ConstantPointerNull::get(Int8PtrType)); + if (EHObj && !isa<ConstantPointerNull>(EHObj)) { + auto I = FrameVarInfo.find(EHObj); + assert(I != FrameVarInfo.end() && + "failed to map llvm.eh.begincatch var"); + FrameEscapeIdx = std::distance(FrameVarInfo.begin(), I); + } + ActionArgs.push_back(ConstantInt::get(Int32Type, FrameEscapeIdx)); } else { ActionArgs.push_back(ConstantInt::get(Int32Type, 0)); } @@ -502,6 +493,15 @@ bool WinEHPrepare::prepareExceptionHandlers( if (!HandlersOutlined) return false; + // Replace any nested landing pad stubs with the correct action handler. + // This must be done before we remove unreachable blocks because it + // cleans up references to outlined blocks that will be deleted. + for (auto &LPadPair : NestedLPtoOriginalLP) + completeNestedLandingPad(&F, LPadPair.first, LPadPair.second, FrameVarInfo); + NestedLPtoOriginalLP.clear(); + + F.addFnAttr("wineh-parent", F.getName()); + // Delete any blocks that were only used by handlers that were outlined above. removeUnreachableBlocks(F); @@ -554,26 +554,26 @@ bool WinEHPrepare::prepareExceptionHandlers( ++InsertPt; ParentAlloca = new AllocaInst(ParentInst->getType(), nullptr, - ParentInst->getName() + ".reg2mem", InsertPt); + ParentInst->getName() + ".reg2mem", + AllocaInsertPt); new StoreInst(ParentInst, ParentAlloca, InsertPt); } else { - ParentAlloca = DemoteRegToStack(*ParentInst, true, ParentInst); + ParentAlloca = DemoteRegToStack(*ParentInst, true, AllocaInsertPt); } } } - // If the parent alloca is no longer used and only one of the handlers used - // it, erase the parent and leave the copy in the outlined handler. - if (ParentAlloca->getNumUses() == 0 && Allocas.size() == 1) { - ParentAlloca->eraseFromParent(); - continue; - } + // FIXME: We should try to sink unescaped allocas from the parent frame into + // the child frame. If the alloca is escaped, we have to use the lifetime + // markers to ensure that the alloca is only live within the child frame. // Add this alloca to the list of things to escape. AllocasToEscape.push_back(ParentAlloca); // Next replace all outlined allocas that are mapped to it. for (AllocaInst *TempAlloca : Allocas) { + if (TempAlloca == getCatchObjectSentinel()) + continue; // Skip catch parameter sentinels. Function *HandlerFn = TempAlloca->getParent()->getParent(); // FIXME: Sink this GEP into the blocks where it is used. Builder.SetInsertPoint(TempAlloca); @@ -601,19 +601,6 @@ bool WinEHPrepare::prepareExceptionHandlers( Builder.SetInsertPoint(&F.getEntryBlock().back()); Builder.CreateCall(FrameEscapeFn, AllocasToEscape); - // Insert an alloca for the EH state in the entry block. On x86, we will also - // insert stores to update the EH state, but on other ISAs, the runtime does - // it for us. - // FIXME: This record is different on x86. - Type *UnwindHelpTy = Type::getInt64Ty(Context); - AllocaInst *UnwindHelp = - new AllocaInst(UnwindHelpTy, "unwindhelp", &F.getEntryBlock().front()); - Builder.CreateStore(llvm::ConstantInt::get(UnwindHelpTy, -2), UnwindHelp); - Function *UnwindHelpFn = - Intrinsic::getDeclaration(M, Intrinsic::eh_unwindhelp); - Builder.CreateCall(UnwindHelpFn, - Builder.CreateBitCast(UnwindHelp, Int8PtrType)); - // Clean up the handler action maps we created for this function DeleteContainerSeconds(CatchHandlerMap); CatchHandlerMap.clear(); @@ -623,6 +610,125 @@ bool WinEHPrepare::prepareExceptionHandlers( return HandlersOutlined; } +void WinEHPrepare::promoteLandingPadValues(LandingPadInst *LPad) { + // If the return values of the landing pad instruction are extracted and + // stored to memory, we want to promote the store locations to reg values. + SmallVector<AllocaInst *, 2> EHAllocas; + + // The landingpad instruction returns an aggregate value. Typically, its + // value will be passed to a pair of extract value instructions and the + // results of those extracts are often passed to store instructions. + // In unoptimized code the stored value will often be loaded and then stored + // again. + for (auto *U : LPad->users()) { + ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(U); + if (!Extract) + continue; + + for (auto *EU : Extract->users()) { + if (auto *Store = dyn_cast<StoreInst>(EU)) { + auto *AV = cast<AllocaInst>(Store->getPointerOperand()); + EHAllocas.push_back(AV); + } + } + } + + // We can't do this without a dominator tree. + assert(DT); + + if (!EHAllocas.empty()) { + PromoteMemToReg(EHAllocas, *DT); + EHAllocas.clear(); + } + + // After promotion, some extracts may be trivially dead. Remove them. + SmallVector<Value *, 4> Users(LPad->user_begin(), LPad->user_end()); + for (auto *U : Users) + RecursivelyDeleteTriviallyDeadInstructions(U); +} + +void WinEHPrepare::completeNestedLandingPad(Function *ParentFn, + LandingPadInst *OutlinedLPad, + const LandingPadInst *OriginalLPad, + FrameVarInfoMap &FrameVarInfo) { + // Get the nested block and erase the unreachable instruction that was + // temporarily inserted as its terminator. + LLVMContext &Context = ParentFn->getContext(); + BasicBlock *OutlinedBB = OutlinedLPad->getParent(); + assert(isa<UnreachableInst>(OutlinedBB->getTerminator())); + OutlinedBB->getTerminator()->eraseFromParent(); + // That should leave OutlinedLPad as the last instruction in its block. + assert(&OutlinedBB->back() == OutlinedLPad); + + // The original landing pad will have already had its action intrinsic + // built by the outlining loop. We need to clone that into the outlined + // location. It may also be necessary to add references to the exception + // variables to the outlined handler in which this landing pad is nested + // and remap return instructions in the nested handlers that should return + // to an address in the outlined handler. + Function *OutlinedHandlerFn = OutlinedBB->getParent(); + BasicBlock::const_iterator II = OriginalLPad; + ++II; + // The instruction after the landing pad should now be a call to eh.actions. + const Instruction *Recover = II; + assert(match(Recover, m_Intrinsic<Intrinsic::eh_actions>())); + IntrinsicInst *EHActions = cast<IntrinsicInst>(Recover->clone()); + + // Remap the exception variables into the outlined function. + WinEHFrameVariableMaterializer Materializer(OutlinedHandlerFn, FrameVarInfo); + SmallVector<BlockAddress *, 4> ActionTargets; + SmallVector<ActionHandler *, 4> ActionList; + parseEHActions(EHActions, ActionList); + for (auto *Action : ActionList) { + auto *Catch = dyn_cast<CatchHandler>(Action); + if (!Catch) + continue; + // The dyn_cast to function here selects C++ catch handlers and skips + // SEH catch handlers. + auto *Handler = dyn_cast<Function>(Catch->getHandlerBlockOrFunc()); + if (!Handler) + continue; + // Visit all the return instructions, looking for places that return + // to a location within OutlinedHandlerFn. + for (BasicBlock &NestedHandlerBB : *Handler) { + auto *Ret = dyn_cast<ReturnInst>(NestedHandlerBB.getTerminator()); + if (!Ret) + continue; + + // Handler functions must always return a block address. + BlockAddress *BA = cast<BlockAddress>(Ret->getReturnValue()); + // The original target will have been in the main parent function, + // but if it is the address of a block that has been outlined, it + // should be a block that was outlined into OutlinedHandlerFn. + assert(BA->getFunction() == ParentFn); + + // Ignore targets that aren't part of OutlinedHandlerFn. + if (!LPadTargetBlocks.count(BA->getBasicBlock())) + continue; + + // If the return value is the address ofF a block that we + // previously outlined into the parent handler function, replace + // the return instruction and add the mapped target to the list + // of possible return addresses. + BasicBlock *MappedBB = LPadTargetBlocks[BA->getBasicBlock()]; + assert(MappedBB->getParent() == OutlinedHandlerFn); + BlockAddress *NewBA = BlockAddress::get(OutlinedHandlerFn, MappedBB); + Ret->eraseFromParent(); + ReturnInst::Create(Context, NewBA, &NestedHandlerBB); + ActionTargets.push_back(NewBA); + } + } + DeleteContainerPointers(ActionList); + ActionList.clear(); + OutlinedBB->getInstList().push_back(EHActions); + + // Insert an indirect branch into the outlined landing pad BB. + IndirectBrInst *IBr = IndirectBrInst::Create(EHActions, 0, OutlinedBB); + // Add the previously collected action targets. + for (auto *Target : ActionTargets) + IBr->addDestination(Target->getBasicBlock()); +} + // This function examines a block to determine whether the block ends with a // conditional branch to a catch handler based on a selector comparison. // This function is used both by the WinEHPrepare::findSelectorComparison() and @@ -657,6 +763,59 @@ static bool isSelectorDispatch(BasicBlock *BB, BasicBlock *&CatchHandler, return false; } +static BasicBlock *createStubLandingPad(Function *Handler, + Value *PersonalityFn) { + // FIXME: Finish this! + LLVMContext &Context = Handler->getContext(); + BasicBlock *StubBB = BasicBlock::Create(Context, "stub"); + Handler->getBasicBlockList().push_back(StubBB); + IRBuilder<> Builder(StubBB); + LandingPadInst *LPad = Builder.CreateLandingPad( + llvm::StructType::get(Type::getInt8PtrTy(Context), + Type::getInt32Ty(Context), nullptr), + PersonalityFn, 0); + LPad->setCleanup(true); + Builder.CreateUnreachable(); + return StubBB; +} + +// Cycles through the blocks in an outlined handler function looking for an +// invoke instruction and inserts an invoke of llvm.donothing with an empty +// landing pad if none is found. The code that generates the .xdata tables for +// the handler needs at least one landing pad to identify the parent function's +// personality. +void WinEHPrepare::addStubInvokeToHandlerIfNeeded(Function *Handler, + Value *PersonalityFn) { + ReturnInst *Ret = nullptr; + for (BasicBlock &BB : *Handler) { + TerminatorInst *Terminator = BB.getTerminator(); + // If we find an invoke, there is nothing to be done. + auto *II = dyn_cast<InvokeInst>(Terminator); + if (II) + return; + // If we've already recorded a return instruction, keep looking for invokes. + if (Ret) + continue; + // If we haven't recorded a return instruction yet, try this terminator. + Ret = dyn_cast<ReturnInst>(Terminator); + } + + // If we got this far, the handler contains no invokes. We should have seen + // at least one return. We'll insert an invoke of llvm.donothing ahead of + // that return. + assert(Ret); + BasicBlock *OldRetBB = Ret->getParent(); + BasicBlock *NewRetBB = SplitBlock(OldRetBB, Ret); + // SplitBlock adds an unconditional branch instruction at the end of the + // parent block. We want to replace that with an invoke call, so we can + // erase it now. + OldRetBB->getTerminator()->eraseFromParent(); + BasicBlock *StubLandingPad = createStubLandingPad(Handler, PersonalityFn); + Function *F = + Intrinsic::getDeclaration(Handler->getParent(), Intrinsic::donothing); + InvokeInst::Create(F, NewRetBB, StubLandingPad, None, "", OldRetBB); +} + bool WinEHPrepare::outlineHandler(ActionHandler *Action, Function *SrcFn, LandingPadInst *LPad, BasicBlock *StartBB, FrameVarInfoMap &VarInfo) { @@ -680,6 +839,8 @@ bool WinEHPrepare::outlineHandler(ActionHandler *Action, Function *SrcFn, SrcFn->getName() + ".cleanup", M); } + Handler->addFnAttr("wineh-parent", SrcFn->getName()); + // Generate a standard prolog to setup the frame recovery structure. IRBuilder<> Builder(Context); BasicBlock *Entry = BasicBlock::Create(Context, "entry"); @@ -696,10 +857,14 @@ bool WinEHPrepare::outlineHandler(ActionHandler *Action, Function *SrcFn, LPadMap.mapLandingPad(LPad); if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) { Constant *Sel = CatchAction->getSelector(); - Director.reset(new WinEHCatchDirector(Handler, Sel, VarInfo, LPadMap)); - LPadMap.remapSelector(VMap, ConstantInt::get(Type::getInt32Ty(Context), 1)); + Director.reset(new WinEHCatchDirector(Handler, Sel, VarInfo, LPadMap, + NestedLPtoOriginalLP)); + LPadMap.remapEHValues(VMap, UndefValue::get(Int8PtrType), + ConstantInt::get(Type::getInt32Ty(Context), 1)); } else { Director.reset(new WinEHCleanupDirector(Handler, VarInfo, LPadMap)); + LPadMap.remapEHValues(VMap, UndefValue::get(Int8PtrType), + UndefValue::get(Type::getInt32Ty(Context))); } SmallVector<ReturnInst *, 8> Returns; @@ -735,12 +900,45 @@ bool WinEHPrepare::outlineHandler(ActionHandler *Action, Function *SrcFn, Entry->getInstList().splice(Entry->end(), FirstClonedBB->getInstList()); FirstClonedBB->eraseFromParent(); + // Make sure we can identify the handler's personality later. + addStubInvokeToHandlerIfNeeded(Handler, LPad->getPersonalityFn()); + if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) { WinEHCatchDirector *CatchDirector = reinterpret_cast<WinEHCatchDirector *>(Director.get()); CatchAction->setExceptionVar(CatchDirector->getExceptionVar()); CatchAction->setReturnTargets(CatchDirector->getReturnTargets()); - } + + // Look for blocks that are not part of the landing pad that we just + // outlined but terminate with a call to llvm.eh.endcatch and a + // branch to a block that is in the handler we just outlined. + // These blocks will be part of a nested landing pad that intends to + // return to an address in this handler. This case is best handled + // after both landing pads have been outlined, so for now we'll just + // save the association of the blocks in LPadTargetBlocks. The + // return instructions which are created from these branches will be + // replaced after all landing pads have been outlined. + for (const auto MapEntry : VMap) { + // VMap maps all values and blocks that were just cloned, but dead + // blocks which were pruned will map to nullptr. + if (!isa<BasicBlock>(MapEntry.first) || MapEntry.second == nullptr) + continue; + const BasicBlock *MappedBB = cast<BasicBlock>(MapEntry.first); + for (auto *Pred : predecessors(const_cast<BasicBlock *>(MappedBB))) { + auto *Branch = dyn_cast<BranchInst>(Pred->getTerminator()); + if (!Branch || !Branch->isUnconditional() || Pred->size() <= 1) + continue; + BasicBlock::iterator II = const_cast<BranchInst *>(Branch); + --II; + if (match(cast<Value>(II), m_Intrinsic<Intrinsic::eh_endcatch>())) { + // This would indicate that a nested landing pad wants to return + // to a block that is outlined into two different handlers. + assert(!LPadTargetBlocks.count(MappedBB)); + LPadTargetBlocks[MappedBB] = cast<BasicBlock>(MapEntry.second); + } + } + } + } // End if (CatchAction) Action->setHandlerBlockOrFunc(Handler); @@ -787,9 +985,8 @@ void LandingPadMap::mapLandingPad(const LandingPadInst *LPad) { // The landingpad instruction returns an aggregate value. Typically, its // value will be passed to a pair of extract value instructions and the - // results of those extracts are often passed to store instructions. - // In unoptimized code the stored value will often be loaded and then stored - // again. + // results of those extracts will have been promoted to reg values before + // this routine is called. for (auto *U : LPad->users()) { const ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(U); if (!Extract) @@ -800,36 +997,17 @@ void LandingPadMap::mapLandingPad(const LandingPadInst *LPad) { assert((Idx == 0 || Idx == 1) && "Unexpected operation: extracting an unknown landing pad element"); if (Idx == 0) { - // Element 0 doesn't directly corresponds to anything in the WinEH - // scheme. - // It will be stored to a memory location, then later loaded and finally - // the loaded value will be used as the argument to an - // llvm.eh.begincatch - // call. We're tracking it here so that we can skip the store and load. ExtractedEHPtrs.push_back(Extract); } else if (Idx == 1) { - // Element 1 corresponds to the filter selector. We'll map it to 1 for - // matching purposes, but it will also probably be stored to memory and - // reloaded, so we need to track the instuction so that we can map the - // loaded value too. ExtractedSelectors.push_back(Extract); } - - // Look for stores of the extracted values. - for (auto *EU : Extract->users()) { - if (auto *Store = dyn_cast<StoreInst>(EU)) { - if (Idx == 1) { - SelectorStores.push_back(Store); - SelectorStoreAddrs.push_back(Store->getPointerOperand()); - } else { - EHPtrStores.push_back(Store); - EHPtrStoreAddrs.push_back(Store->getPointerOperand()); - } - } - } } } +bool LandingPadMap::isOriginLandingPadBlock(const BasicBlock *BB) const { + return BB->getLandingPadInst() == OriginLPad; +} + bool LandingPadMap::isLandingPadSpecificInst(const Instruction *Inst) const { if (Inst == OriginLPad) return true; @@ -841,47 +1019,16 @@ bool LandingPadMap::isLandingPadSpecificInst(const Instruction *Inst) const { if (Inst == Extract) return true; } - for (auto *Store : EHPtrStores) { - if (Inst == Store) - return true; - } - for (auto *Store : SelectorStores) { - if (Inst == Store) - return true; - } - return false; } -void LandingPadMap::remapSelector(ValueToValueMapTy &VMap, - Value *MappedValue) const { - // Remap all selector extract instructions to the specified value. +void LandingPadMap::remapEHValues(ValueToValueMapTy &VMap, Value *EHPtrValue, + Value *SelectorValue) const { + // Remap all landing pad extract instructions to the specified values. + for (auto *Extract : ExtractedEHPtrs) + VMap[Extract] = EHPtrValue; for (auto *Extract : ExtractedSelectors) - VMap[Extract] = MappedValue; -} - -bool LandingPadMap::mapIfEHLoad(const LoadInst *Load, - SmallVectorImpl<const StoreInst *> &Stores, - SmallVectorImpl<const Value *> &StoreAddrs) { - // This makes the assumption that a store we've previously seen dominates - // this load instruction. That might seem like a rather huge assumption, - // but given the way that landingpads are constructed its fairly safe. - // FIXME: Add debug/assert code that verifies this. - const Value *LoadAddr = Load->getPointerOperand(); - for (auto *StoreAddr : StoreAddrs) { - if (LoadAddr == StoreAddr) { - // Handle the common debug scenario where this loaded value is stored - // to a different location. - for (auto *U : Load->users()) { - if (auto *Store = dyn_cast<StoreInst>(U)) { - Stores.push_back(Store); - StoreAddrs.push_back(Store->getPointerOperand()); - } - } - return true; - } - } - return false; + VMap[Extract] = SelectorValue; } CloningDirector::CloningAction WinEHCloningDirectorBase::handleInstruction( @@ -891,40 +1038,13 @@ CloningDirector::CloningAction WinEHCloningDirectorBase::handleInstruction( if (LPadMap.isLandingPadSpecificInst(Inst)) return CloningDirector::SkipInstruction; - if (auto *Load = dyn_cast<LoadInst>(Inst)) { - // Look for loads of (previously suppressed) landingpad values. - // The EHPtr load can be mapped to an undef value as it should only be used - // as an argument to llvm.eh.begincatch, but the selector value needs to be - // mapped to a constant value of 1. This value will be used to simplify the - // branching to always flow to the current handler. - if (LPadMap.mapIfSelectorLoad(Load)) { - VMap[Inst] = ConstantInt::get(SelectorIDType, 1); - return CloningDirector::SkipInstruction; - } - if (LPadMap.mapIfEHPtrLoad(Load)) { - VMap[Inst] = UndefValue::get(Int8PtrType); - return CloningDirector::SkipInstruction; - } - - // Any other loads just get cloned. - return CloningDirector::CloneInstruction; - } - // Nested landing pads will be cloned as stubs, with just the // landingpad instruction and an unreachable instruction. When // all landingpads have been outlined, we'll replace this with the // llvm.eh.actions call and indirect branch created when the // landing pad was outlined. - if (auto *NestedLPad = dyn_cast<LandingPadInst>(Inst)) { - Instruction *NewInst = NestedLPad->clone(); - if (NestedLPad->hasName()) - NewInst->setName(NestedLPad->getName()); - // FIXME: Store this mapping somewhere else also. - VMap[NestedLPad] = NewInst; - BasicBlock::InstListType &InstList = NewBB->getInstList(); - InstList.push_back(NewInst); - InstList.push_back(new UnreachableInst(NewBB->getContext())); - return CloningDirector::StopCloningBB; + if (auto *LPad = dyn_cast<LandingPadInst>(Inst)) { + return handleLandingPad(VMap, LPad, NewBB); } if (auto *Invoke = dyn_cast<InvokeInst>(Inst)) @@ -944,6 +1064,20 @@ CloningDirector::CloningAction WinEHCloningDirectorBase::handleInstruction( return CloningDirector::CloneInstruction; } +CloningDirector::CloningAction WinEHCatchDirector::handleLandingPad( + ValueToValueMapTy &VMap, const LandingPadInst *LPad, BasicBlock *NewBB) { + Instruction *NewInst = LPad->clone(); + if (LPad->hasName()) + NewInst->setName(LPad->getName()); + // Save this correlation for later processing. + NestedLPtoOriginalLP[cast<LandingPadInst>(NewInst)] = LPad; + VMap[LPad] = NewInst; + BasicBlock::InstListType &InstList = NewBB->getInstList(); + InstList.push_back(NewInst); + InstList.push_back(new UnreachableInst(NewBB->getContext())); + return CloningDirector::StopCloningBB; +} + CloningDirector::CloningAction WinEHCatchDirector::handleBeginCatch( ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { // The argument to the call is some form of the first element of the @@ -958,6 +1092,11 @@ CloningDirector::CloningAction WinEHCatchDirector::handleBeginCatch( "llvm.eh.begincatch found while " "outlining catch handler."); ExceptionObjectVar = Inst->getOperand(1)->stripPointerCasts(); + if (isa<ConstantPointerNull>(ExceptionObjectVar)) + return CloningDirector::SkipInstruction; + assert(cast<AllocaInst>(ExceptionObjectVar)->isStaticAlloca() && + "catch parameter is not static alloca"); + Materializer.escapeCatchObject(ExceptionObjectVar); return CloningDirector::SkipInstruction; } @@ -971,27 +1110,32 @@ WinEHCatchDirector::handleEndCatch(ValueToValueMapTy &VMap, // The end catch call can occur in one of two places: either in a // landingpad block that is part of the catch handlers exception mechanism, - // or at the end of the catch block. If it occurs in a landing pad, we must - // skip it and continue so that the landing pad gets cloned. - // FIXME: This case isn't fully supported yet and shouldn't turn up in any - // of the test cases until it is. - if (IntrinCall->getParent()->isLandingPad()) + // or at the end of the catch block. However, a catch-all handler may call + // end catch from the original landing pad. If the call occurs in a nested + // landing pad block, we must skip it and continue so that the landing pad + // gets cloned. + auto *ParentBB = IntrinCall->getParent(); + if (ParentBB->isLandingPad() && !LPadMap.isOriginLandingPadBlock(ParentBB)) return CloningDirector::SkipInstruction; - // If an end catch occurs anywhere else the next instruction should be an - // unconditional branch instruction that we want to replace with a return - // to the the address of the branch target. - const BasicBlock *EndCatchBB = IntrinCall->getParent(); - const TerminatorInst *Terminator = EndCatchBB->getTerminator(); - const BranchInst *Branch = dyn_cast<BranchInst>(Terminator); - assert(Branch && Branch->isUnconditional()); - assert(std::next(BasicBlock::const_iterator(IntrinCall)) == - BasicBlock::const_iterator(Branch)); - - BasicBlock *ContinueLabel = Branch->getSuccessor(0); - ReturnInst::Create(NewBB->getContext(), BlockAddress::get(ContinueLabel), - NewBB); - ReturnTargets.push_back(ContinueLabel); + // If an end catch occurs anywhere else we want to terminate the handler + // with a return to the code that follows the endcatch call. If the + // next instruction is not an unconditional branch, we need to split the + // block to provide a clear target for the return instruction. + BasicBlock *ContinueBB; + auto Next = std::next(BasicBlock::const_iterator(IntrinCall)); + const BranchInst *Branch = dyn_cast<BranchInst>(Next); + if (!Branch || !Branch->isUnconditional()) { + // We're interrupting the cloning process at this location, so the + // const_cast we're doing here will not cause a problem. + ContinueBB = SplitBlock(const_cast<BasicBlock *>(ParentBB), + const_cast<Instruction *>(cast<Instruction>(Next))); + } else { + ContinueBB = Branch->getSuccessor(0); + } + + ReturnInst::Create(NewBB->getContext(), BlockAddress::get(ContinueBB), NewBB); + ReturnTargets.push_back(ContinueBB); // We just added a terminator to the cloned block. // Tell the caller to stop processing the current basic block so that @@ -1029,6 +1173,20 @@ WinEHCatchDirector::handleResume(ValueToValueMapTy &VMap, return CloningDirector::StopCloningBB; } +CloningDirector::CloningAction WinEHCleanupDirector::handleLandingPad( + ValueToValueMapTy &VMap, const LandingPadInst *LPad, BasicBlock *NewBB) { + // The MS runtime will terminate the process if an exception occurs in a + // cleanup handler, so we shouldn't encounter landing pads in the actual + // cleanup code, but they may appear in catch blocks. Depending on where + // we started cloning we may see one, but it will get dropped during dead + // block pruning. + Instruction *NewInst = new UnreachableInst(NewBB->getContext()); + VMap[LPad] = NewInst; + BasicBlock::InstListType &InstList = NewBB->getInstList(); + InstList.push_back(NewInst); + return CloningDirector::StopCloningBB; +} + CloningDirector::CloningAction WinEHCleanupDirector::handleBeginCatch( ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { // Catch blocks within cleanup handlers will always be unreachable. @@ -1041,12 +1199,9 @@ CloningDirector::CloningAction WinEHCleanupDirector::handleBeginCatch( CloningDirector::CloningAction WinEHCleanupDirector::handleEndCatch( ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { - // Catch blocks within cleanup handlers will always be unreachable. - // We'll insert an unreachable instruction now, but it will be pruned - // before the cloning process is complete. - BasicBlock::InstListType &InstList = NewBB->getInstList(); - InstList.push_back(new UnreachableInst(NewBB->getContext())); - return CloningDirector::StopCloningBB; + // Cleanup handlers nested within catch handlers may begin with a call to + // eh.endcatch. We can just ignore that instruction. + return CloningDirector::SkipInstruction; } CloningDirector::CloningAction WinEHCleanupDirector::handleTypeIdFor( @@ -1080,6 +1235,9 @@ CloningDirector::CloningAction WinEHCleanupDirector::handleInvoke( NewCall->setDebugLoc(Invoke->getDebugLoc()); VMap[Invoke] = NewCall; + // Remap the operands. + llvm::RemapInstruction(NewCall, VMap, RF_None, nullptr, &Materializer); + // Insert an unconditional branch to the normal destination. BranchInst::Create(Invoke->getNormalDest(), NewBB); @@ -1088,7 +1246,7 @@ CloningDirector::CloningAction WinEHCleanupDirector::handleInvoke( // We just added a terminator to the cloned block. // Tell the caller to stop processing the current basic block. - return CloningDirector::StopCloningBB; + return CloningDirector::CloneSuccessors; } CloningDirector::CloningAction WinEHCleanupDirector::handleResume( @@ -1104,7 +1262,8 @@ CloningDirector::CloningAction WinEHCleanupDirector::handleResume( WinEHFrameVariableMaterializer::WinEHFrameVariableMaterializer( Function *OutlinedFn, FrameVarInfoMap &FrameVarInfo) : FrameVarInfo(FrameVarInfo), Builder(OutlinedFn->getContext()) { - Builder.SetInsertPoint(&OutlinedFn->getEntryBlock()); + BasicBlock *EntryBB = &OutlinedFn->getEntryBlock(); + Builder.SetInsertPoint(EntryBB, EntryBB->getFirstInsertionPt()); } Value *WinEHFrameVariableMaterializer::materializeValueFor(Value *V) { @@ -1139,6 +1298,15 @@ Value *WinEHFrameVariableMaterializer::materializeValueFor(Value *V) { return nullptr; } +void WinEHFrameVariableMaterializer::escapeCatchObject(Value *V) { + // Catch parameter objects have to live in the parent frame. When we see a use + // of a catch parameter, add a sentinel to the multimap to indicate that it's + // used from another handler. This will prevent us from trying to sink the + // alloca into the handler and ensure that the catch parameter is present in + // the call to llvm.frameescape. + FrameVarInfo[V].push_back(getCatchObjectSentinel()); +} + // This function maps the catch and cleanup handlers that are reachable from the // specified landing pad. The landing pad sequence will have this basic shape: // @@ -1176,13 +1344,7 @@ void WinEHPrepare::mapLandingPadBlocks(LandingPadInst *LPad, DEBUG(dbgs() << "Mapping landing pad: " << BB->getName() << "\n"); if (NumClauses == 0) { - // This landing pad contains only cleanup code. - CleanupHandler *Action = new CleanupHandler(BB); - CleanupHandlerMap[BB] = Action; - Actions.insertCleanupHandler(Action); - DEBUG(dbgs() << " Assuming cleanup code in block " << BB->getName() - << "\n"); - assert(LPad->isCleanup()); + findCleanupHandlers(Actions, BB, nullptr); return; } @@ -1202,14 +1364,8 @@ void WinEHPrepare::mapLandingPadBlocks(LandingPadInst *LPad, // exceptions but code called from catches can. For SEH, it isn't // important if some finally code before a catch-all is executed out of // line or after recovering from the exception. - if (Personality == EHPersonality::MSVC_CXX) { - if (auto *CleanupAction = findCleanupHandler(BB, BB)) { - // Add a cleanup entry to the list - Actions.insertCleanupHandler(CleanupAction); - DEBUG(dbgs() << " Found cleanup code in block " - << CleanupAction->getStartBlock()->getName() << "\n"); - } - } + if (Personality == EHPersonality::MSVC_CXX) + findCleanupHandlers(Actions, BB, BB); // Add the catch handler to the action list. CatchHandler *Action = @@ -1226,13 +1382,7 @@ void WinEHPrepare::mapLandingPadBlocks(LandingPadInst *LPad, CatchHandler *CatchAction = findCatchHandler(BB, NextBB, VisitedBlocks); // See if there is any interesting code executed before the dispatch. - if (auto *CleanupAction = - findCleanupHandler(BB, CatchAction->getStartBlock())) { - // Add a cleanup entry to the list - Actions.insertCleanupHandler(CleanupAction); - DEBUG(dbgs() << " Found cleanup code in block " - << CleanupAction->getStartBlock()->getName() << "\n"); - } + findCleanupHandlers(Actions, BB, CatchAction->getStartBlock()); assert(CatchAction); ++HandlersFound; @@ -1248,12 +1398,7 @@ void WinEHPrepare::mapLandingPadBlocks(LandingPadInst *LPad, // If we didn't wind up in a catch-all, see if there is any interesting code // executed before the resume. - if (auto *CleanupAction = findCleanupHandler(BB, BB)) { - // Add a cleanup entry to the list - Actions.insertCleanupHandler(CleanupAction); - DEBUG(dbgs() << " Found cleanup code in block " - << CleanupAction->getStartBlock()->getName() << "\n"); - } + findCleanupHandlers(Actions, BB, BB); // It's possible that some optimization moved code into a landingpad that // wasn't @@ -1313,20 +1458,56 @@ CatchHandler *WinEHPrepare::findCatchHandler(BasicBlock *BB, return nullptr; } -// These are helper functions to combine repeated code from findCleanupHandler. -static CleanupHandler *createCleanupHandler(CleanupHandlerMapTy &CleanupHandlerMap, - BasicBlock *BB) { +// These are helper functions to combine repeated code from findCleanupHandlers. +static void createCleanupHandler(LandingPadActions &Actions, + CleanupHandlerMapTy &CleanupHandlerMap, + BasicBlock *BB) { CleanupHandler *Action = new CleanupHandler(BB); CleanupHandlerMap[BB] = Action; - return Action; + Actions.insertCleanupHandler(Action); + DEBUG(dbgs() << " Found cleanup code in block " + << Action->getStartBlock()->getName() << "\n"); +} + +static bool isFrameAddressCall(Value *V) { + return match(V, m_Intrinsic<Intrinsic::frameaddress>(m_SpecificInt(0))); +} + +static CallSite matchOutlinedFinallyCall(BasicBlock *BB, + Instruction *MaybeCall) { + // Look for finally blocks that Clang has already outlined for us. + // %fp = call i8* @llvm.frameaddress(i32 0) + // call void @"fin$parent"(iN 1, i8* %fp) + if (isFrameAddressCall(MaybeCall) && MaybeCall != BB->getTerminator()) + MaybeCall = MaybeCall->getNextNode(); + CallSite FinallyCall(MaybeCall); + if (!FinallyCall || FinallyCall.arg_size() != 2) + return CallSite(); + if (!match(FinallyCall.getArgument(0), m_SpecificInt(1))) + return CallSite(); + if (!isFrameAddressCall(FinallyCall.getArgument(1))) + return CallSite(); + return FinallyCall; +} + +static BasicBlock *followSingleUnconditionalBranches(BasicBlock *BB) { + // Skip single ubr blocks. + while (BB->getFirstNonPHIOrDbg() == BB->getTerminator()) { + auto *Br = dyn_cast<BranchInst>(BB->getTerminator()); + if (Br && Br->isUnconditional()) + BB = Br->getSuccessor(0); + else + return BB; + } + return BB; } // This function searches starting with the input block for the next block that // contains code that is not part of a catch handler and would not be eliminated // during handler outlining. // -CleanupHandler *WinEHPrepare::findCleanupHandler(BasicBlock *StartBB, - BasicBlock *EndBB) { +void WinEHPrepare::findCleanupHandlers(LandingPadActions &Actions, + BasicBlock *StartBB, BasicBlock *EndBB) { // Here we will skip over the following: // // landing pad prolog: @@ -1343,6 +1524,7 @@ CleanupHandler *WinEHPrepare::findCleanupHandler(BasicBlock *StartBB, // Anything other than an unconditional branch will kick us out of this loop // one way or another. while (BB) { + BB = followSingleUnconditionalBranches(BB); // If we've already scanned this block, don't scan it again. If it is // a cleanup block, there will be an action in the CleanupHandlerMap. // If we've scanned it and it is not a cleanup block, there will be a @@ -1351,7 +1533,12 @@ CleanupHandler *WinEHPrepare::findCleanupHandler(BasicBlock *StartBB, // avoid creating a null entry for blocks we haven't scanned. if (CleanupHandlerMap.count(BB)) { if (auto *Action = CleanupHandlerMap[BB]) { - return cast<CleanupHandler>(Action); + Actions.insertCleanupHandler(Action); + DEBUG(dbgs() << " Found cleanup code in block " + << Action->getStartBlock()->getName() << "\n"); + // FIXME: This cleanup might chain into another, and we need to discover + // that. + return; } else { // Here we handle the case where the cleanup handler map contains a // value for this block but the value is a nullptr. This means that @@ -1363,11 +1550,9 @@ CleanupHandler *WinEHPrepare::findCleanupHandler(BasicBlock *StartBB, // would terminate the search for cleanup code, so the unconditional // branch is the only case for which we might need to continue // searching. - if (BB == EndBB) - return nullptr; - BasicBlock *SuccBB; - if (!match(BB->getTerminator(), m_UnconditionalBr(SuccBB))) - return nullptr; + BasicBlock *SuccBB = followSingleUnconditionalBranches(BB); + if (SuccBB == BB || SuccBB == EndBB) + return; BB = SuccBB; continue; } @@ -1390,26 +1575,23 @@ CleanupHandler *WinEHPrepare::findCleanupHandler(BasicBlock *StartBB, } // Look for the bare resume pattern: - // %exn2 = load i8** %exn.slot - // %sel2 = load i32* %ehselector.slot - // %lpad.val1 = insertvalue { i8*, i32 } undef, i8* %exn2, 0 - // %lpad.val2 = insertvalue { i8*, i32 } %lpad.val1, i32 %sel2, 1 + // %lpad.val1 = insertvalue { i8*, i32 } undef, i8* %exn, 0 + // %lpad.val2 = insertvalue { i8*, i32 } %lpad.val1, i32 %sel, 1 // resume { i8*, i32 } %lpad.val2 if (auto *Resume = dyn_cast<ResumeInst>(Terminator)) { InsertValueInst *Insert1 = nullptr; InsertValueInst *Insert2 = nullptr; Value *ResumeVal = Resume->getOperand(0); - // If there is only one landingpad, we may use the lpad directly with no - // insertions. - if (isa<LandingPadInst>(ResumeVal)) - return nullptr; - if (!isa<PHINode>(ResumeVal)) { + // If the resume value isn't a phi or landingpad value, it should be a + // series of insertions. Identify them so we can avoid them when scanning + // for cleanups. + if (!isa<PHINode>(ResumeVal) && !isa<LandingPadInst>(ResumeVal)) { Insert2 = dyn_cast<InsertValueInst>(ResumeVal); if (!Insert2) - return createCleanupHandler(CleanupHandlerMap, BB); + return createCleanupHandler(Actions, CleanupHandlerMap, BB); Insert1 = dyn_cast<InsertValueInst>(Insert2->getAggregateOperand()); if (!Insert1) - return createCleanupHandler(CleanupHandlerMap, BB); + return createCleanupHandler(Actions, CleanupHandlerMap, BB); } for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); II != IE; ++II) { @@ -1420,66 +1602,133 @@ CleanupHandler *WinEHPrepare::findCleanupHandler(BasicBlock *StartBB, continue; if (!Inst->hasOneUse() || (Inst->user_back() != Insert1 && Inst->user_back() != Insert2)) { - return createCleanupHandler(CleanupHandlerMap, BB); + return createCleanupHandler(Actions, CleanupHandlerMap, BB); } } - return nullptr; + return; } BranchInst *Branch = dyn_cast<BranchInst>(Terminator); - if (Branch) { - if (Branch->isConditional()) { - // Look for the selector dispatch. - // %sel = load i32* %ehselector.slot - // %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIf to i8*)) - // %matches = icmp eq i32 %sel12, %2 - // br i1 %matches, label %catch14, label %eh.resume - CmpInst *Compare = dyn_cast<CmpInst>(Branch->getCondition()); - if (!Compare || !Compare->isEquality()) - return createCleanupHandler(CleanupHandlerMap, BB); - for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), - IE = BB->end(); - II != IE; ++II) { - Instruction *Inst = II; - if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst)) - continue; - if (Inst == Compare || Inst == Branch) - continue; - if (!Inst->hasOneUse() || (Inst->user_back() != Compare)) - return createCleanupHandler(CleanupHandlerMap, BB); - if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>())) - continue; - if (!isa<LoadInst>(Inst)) - return createCleanupHandler(CleanupHandlerMap, BB); - } - // The selector dispatch block should always terminate our search. - assert(BB == EndBB); - return nullptr; - } else { - // Look for empty blocks with unconditional branches. - for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), - IE = BB->end(); - II != IE; ++II) { - Instruction *Inst = II; - if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst)) - continue; - if (Inst == Branch) - continue; - if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>())) - continue; - // Anything else makes this interesting cleanup code. - return createCleanupHandler(CleanupHandlerMap, BB); + if (Branch && Branch->isConditional()) { + // Look for the selector dispatch. + // %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIf to i8*)) + // %matches = icmp eq i32 %sel, %2 + // br i1 %matches, label %catch14, label %eh.resume + CmpInst *Compare = dyn_cast<CmpInst>(Branch->getCondition()); + if (!Compare || !Compare->isEquality()) + return createCleanupHandler(Actions, CleanupHandlerMap, BB); + for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); + II != IE; ++II) { + Instruction *Inst = II; + if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst)) + continue; + if (Inst == Compare || Inst == Branch) + continue; + if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>())) + continue; + return createCleanupHandler(Actions, CleanupHandlerMap, BB); + } + // The selector dispatch block should always terminate our search. + assert(BB == EndBB); + return; + } + + if (isAsynchronousEHPersonality(Personality)) { + // If this is a landingpad block, split the block at the first non-landing + // pad instruction. + Instruction *MaybeCall = BB->getFirstNonPHIOrDbg(); + if (LPadMap) { + while (MaybeCall != BB->getTerminator() && + LPadMap->isLandingPadSpecificInst(MaybeCall)) + MaybeCall = MaybeCall->getNextNode(); + } + + // Look for outlined finally calls. + if (CallSite FinallyCall = matchOutlinedFinallyCall(BB, MaybeCall)) { + Function *Fin = FinallyCall.getCalledFunction(); + assert(Fin && "outlined finally call should be direct"); + auto *Action = new CleanupHandler(BB); + Action->setHandlerBlockOrFunc(Fin); + Actions.insertCleanupHandler(Action); + CleanupHandlerMap[BB] = Action; + DEBUG(dbgs() << " Found frontend-outlined finally call to " + << Fin->getName() << " in block " + << Action->getStartBlock()->getName() << "\n"); + + // Split the block if there were more interesting instructions and look + // for finally calls in the normal successor block. + BasicBlock *SuccBB = BB; + if (FinallyCall.getInstruction() != BB->getTerminator() && + FinallyCall.getInstruction()->getNextNode() != BB->getTerminator()) { + SuccBB = BB->splitBasicBlock(FinallyCall.getInstruction()->getNextNode()); + } else { + if (FinallyCall.isInvoke()) { + SuccBB = cast<InvokeInst>(FinallyCall.getInstruction())->getNormalDest(); + } else { + SuccBB = BB->getUniqueSuccessor(); + assert(SuccBB && "splitOutlinedFinallyCalls didn't insert a branch"); + } } + BB = SuccBB; if (BB == EndBB) - return nullptr; - // The branch was unconditional. - BB = Branch->getSuccessor(0); + return; + continue; + } + } + + // Anything else is either a catch block or interesting cleanup code. + for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); + II != IE; ++II) { + Instruction *Inst = II; + if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst)) + continue; + // Unconditional branches fall through to this loop. + if (Inst == Branch) continue; - } // End else of if branch was conditional - } // End if Branch + // If this is a catch block, there is no cleanup code to be found. + if (match(Inst, m_Intrinsic<Intrinsic::eh_begincatch>())) + return; + // If this a nested landing pad, it may contain an endcatch call. + if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>())) + return; + // Anything else makes this interesting cleanup code. + return createCleanupHandler(Actions, CleanupHandlerMap, BB); + } - // Anything else makes this interesting cleanup code. - return createCleanupHandler(CleanupHandlerMap, BB); + // Only unconditional branches in empty blocks should get this far. + assert(Branch && Branch->isUnconditional()); + if (BB == EndBB) + return; + BB = Branch->getSuccessor(0); } - return nullptr; +} + +// This is a public function, declared in WinEHFuncInfo.h and is also +// referenced by WinEHNumbering in FunctionLoweringInfo.cpp. +void llvm::parseEHActions(const IntrinsicInst *II, + SmallVectorImpl<ActionHandler *> &Actions) { + for (unsigned I = 0, E = II->getNumArgOperands(); I != E;) { + uint64_t ActionKind = + cast<ConstantInt>(II->getArgOperand(I))->getZExtValue(); + if (ActionKind == /*catch=*/1) { + auto *Selector = cast<Constant>(II->getArgOperand(I + 1)); + ConstantInt *EHObjIndex = cast<ConstantInt>(II->getArgOperand(I + 2)); + int64_t EHObjIndexVal = EHObjIndex->getSExtValue(); + Constant *Handler = cast<Constant>(II->getArgOperand(I + 3)); + I += 4; + auto *CH = new CatchHandler(/*BB=*/nullptr, Selector, /*NextBB=*/nullptr); + CH->setHandlerBlockOrFunc(Handler); + CH->setExceptionVarIndex(EHObjIndexVal); + Actions.push_back(CH); + } else if (ActionKind == 0) { + Constant *Handler = cast<Constant>(II->getArgOperand(I + 1)); + I += 2; + auto *CH = new CleanupHandler(/*BB=*/nullptr); + CH->setHandlerBlockOrFunc(Handler); + Actions.push_back(CH); + } else { + llvm_unreachable("Expected either a catch or cleanup handler!"); + } + } + std::reverse(Actions.begin(), Actions.end()); } diff --git a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp index 9f56214..7846529 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp @@ -197,8 +197,7 @@ public: DataAlignmentFactor(DataAlignmentFactor), ReturnAddressRegister(ReturnAddressRegister) {} - ~CIE() { - } + ~CIE() override {} uint64_t getCodeAlignmentFactor() const { return CodeAlignmentFactor; } int64_t getDataAlignmentFactor() const { return DataAlignmentFactor; } @@ -245,8 +244,7 @@ public: InitialLocation(InitialLocation), AddressRange(AddressRange), LinkedCIE(Cie) {} - ~FDE() { - } + ~FDE() override {} CIE *getLinkedCIE() const { return LinkedCIE; } diff --git a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp index b14af07..0aff327 100644 --- a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp @@ -48,9 +48,10 @@ public: reset(); } - uint32_t getChildCount() const { return Args.size(); } + uint32_t getChildCount() const override { return Args.size(); } - std::unique_ptr<PDBSymbolData> getChildAtIndex(uint32_t Index) const { + std::unique_ptr<PDBSymbolData> + getChildAtIndex(uint32_t Index) const override { if (Index >= Args.size()) return nullptr; @@ -58,7 +59,7 @@ public: Args[Index]->getSymIndexId()); } - std::unique_ptr<PDBSymbolData> getNext() { + std::unique_ptr<PDBSymbolData> getNext() override { if (CurIter == Args.end()) return nullptr; const auto &Result = **CurIter; @@ -66,9 +67,9 @@ public: return Session.getConcreteSymbolById<PDBSymbolData>(Result.getSymIndexId()); } - void reset() { CurIter = Args.empty() ? Args.end() : Args.begin(); } + void reset() override { CurIter = Args.empty() ? Args.end() : Args.begin(); } - FunctionArgEnumerator *clone() const { + FunctionArgEnumerator *clone() const override { return new FunctionArgEnumerator(Session, Func); } diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp index 8018206..af3563f 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp @@ -34,25 +34,27 @@ public: std::unique_ptr<ArgEnumeratorType> ArgEnumerator) : Session(PDBSession), Enumerator(std::move(ArgEnumerator)) {} - uint32_t getChildCount() const { return Enumerator->getChildCount(); } + uint32_t getChildCount() const override { + return Enumerator->getChildCount(); + } - std::unique_ptr<PDBSymbol> getChildAtIndex(uint32_t Index) const { + std::unique_ptr<PDBSymbol> getChildAtIndex(uint32_t Index) const override { auto FunctionArgSymbol = Enumerator->getChildAtIndex(Index); if (!FunctionArgSymbol) return nullptr; return Session.getSymbolById(FunctionArgSymbol->getTypeId()); } - std::unique_ptr<PDBSymbol> getNext() { + std::unique_ptr<PDBSymbol> getNext() override { auto FunctionArgSymbol = Enumerator->getNext(); if (!FunctionArgSymbol) return nullptr; return Session.getSymbolById(FunctionArgSymbol->getTypeId()); } - void reset() { Enumerator->reset(); } + void reset() override { Enumerator->reset(); } - MyType *clone() const { + MyType *clone() const override { std::unique_ptr<ArgEnumeratorType> Clone(Enumerator->clone()); return new FunctionArgEnumerator(Session, std::move(Clone)); } diff --git a/lib/ExecutionEngine/EventListenerCommon.h b/lib/ExecutionEngine/EventListenerCommon.h deleted file mode 100644 index 6453099..0000000 --- a/lib/ExecutionEngine/EventListenerCommon.h +++ /dev/null @@ -1,68 +0,0 @@ -//===-- JIT.h - Abstract Execution Engine Interface -------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Common functionality for JITEventListener implementations -// -//===----------------------------------------------------------------------===// - -#ifndef EVENT_LISTENER_COMMON_H -#define EVENT_LISTENER_COMMON_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/IR/DebugInfo.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/ValueHandle.h" -#include "llvm/Support/Path.h" - -namespace llvm { - -namespace jitprofiling { - -class FilenameCache { - // Holds the filename of each Scope, so that we can pass a null-terminated - // string into oprofile. - DenseMap<const MDNode *, std::string> Filenames; - DenseMap<const MDNode *, std::string> Paths; - - public: - const char *getFilename(MDNode *Scope) { - assert(Scope->isResolved() && "Expected Scope to be resolved"); - std::string &Filename = Filenames[Scope]; - if (Filename.empty()) { - DIScope DIScope(Scope); - Filename = DIScope.getFilename(); - } - return Filename.c_str(); - } - - const char *getFullPath(MDNode *Scope) { - assert(Scope->isResolved() && "Expected Scope to be resolved"); - std::string &P = Paths[Scope]; - if (P.empty()) { - DIScope DIScope(Scope); - StringRef DirName = DIScope.getDirectory(); - StringRef FileName = DIScope.getFilename(); - SmallString<256> FullPath; - if (DirName != "." && DirName != "") { - FullPath = DirName; - } - if (FileName != "") { - sys::path::append(FullPath, FileName); - } - P = FullPath.str(); - } - return P.c_str(); - } -}; - -} // namespace jitprofiling - -} // namespace llvm - -#endif //EVENT_LISTENER_COMMON_H diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index c586ba7..d7038fd 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -18,9 +18,11 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/JITEventListener.h" +#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" @@ -45,11 +47,13 @@ STATISTIC(NumGlobals , "Number of global vars initialized"); ExecutionEngine *(*ExecutionEngine::MCJITCtor)( std::unique_ptr<Module> M, std::string *ErrorStr, - std::unique_ptr<RTDyldMemoryManager> MCJMM, + std::shared_ptr<MCJITMemoryManager> MemMgr, + std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver, std::unique_ptr<TargetMachine> TM) = nullptr; ExecutionEngine *(*ExecutionEngine::OrcMCJITReplacementCtor)( - std::string *ErrorStr, std::unique_ptr<RTDyldMemoryManager> OrcJMM, + std::string *ErrorStr, std::shared_ptr<MCJITMemoryManager> MemMgr, + std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver, std::unique_ptr<TargetMachine> TM) = nullptr; ExecutionEngine *(*ExecutionEngine::InterpCtor)(std::unique_ptr<Module> M, @@ -58,8 +62,7 @@ ExecutionEngine *(*ExecutionEngine::InterpCtor)(std::unique_ptr<Module> M, void JITEventListener::anchor() {} ExecutionEngine::ExecutionEngine(std::unique_ptr<Module> M) - : EEState(*this), - LazyFunctionCreator(nullptr) { + : LazyFunctionCreator(nullptr) { CompilingLazily = false; GVCompilationDisabled = false; SymbolSearchingDisabled = false; @@ -151,38 +154,52 @@ Function *ExecutionEngine::FindFunctionNamed(const char *FnName) { } -void *ExecutionEngineState::RemoveMapping(const GlobalValue *ToUnmap) { - GlobalAddressMapTy::iterator I = GlobalAddressMap.find(ToUnmap); - void *OldVal; +uint64_t ExecutionEngineState::RemoveMapping(StringRef Name) { + GlobalAddressMapTy::iterator I = GlobalAddressMap.find(Name); + uint64_t OldVal; // FIXME: This is silly, we shouldn't end up with a mapping -> 0 in the // GlobalAddressMap. if (I == GlobalAddressMap.end()) - OldVal = nullptr; + OldVal = 0; else { + GlobalAddressReverseMap.erase(I->second); OldVal = I->second; GlobalAddressMap.erase(I); } - GlobalAddressReverseMap.erase(OldVal); return OldVal; } +std::string ExecutionEngine::getMangledName(const GlobalValue *GV) { + MutexGuard locked(lock); + Mangler Mang(DL); + SmallString<128> FullName; + Mang.getNameWithPrefix(FullName, GV->getName()); + return FullName.str(); +} + void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) { MutexGuard locked(lock); + addGlobalMapping(getMangledName(GV), (uint64_t) Addr); +} + +void ExecutionEngine::addGlobalMapping(StringRef Name, uint64_t Addr) { + MutexGuard locked(lock); + + assert(!Name.empty() && "Empty GlobalMapping symbol name!"); - DEBUG(dbgs() << "JIT: Map \'" << GV->getName() - << "\' to [" << Addr << "]\n";); - void *&CurVal = EEState.getGlobalAddressMap()[GV]; + DEBUG(dbgs() << "JIT: Map \'" << Name << "\' to [" << Addr << "]\n";); + uint64_t &CurVal = EEState.getGlobalAddressMap()[Name]; assert((!CurVal || !Addr) && "GlobalMapping already established!"); CurVal = Addr; // If we are using the reverse mapping, add it too. if (!EEState.getGlobalAddressReverseMap().empty()) { - AssertingVH<const GlobalValue> &V = - EEState.getGlobalAddressReverseMap()[Addr]; - assert((!V || !GV) && "GlobalMapping already established!"); - V = GV; + std::string &V = EEState.getGlobalAddressReverseMap()[CurVal]; + assert((!V.empty() || !Name.empty()) && + "GlobalMapping already established!"); + V = Name; } } @@ -197,13 +214,19 @@ void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) { MutexGuard locked(lock); for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) - EEState.RemoveMapping(FI); + EEState.RemoveMapping(getMangledName(FI)); for (Module::global_iterator GI = M->global_begin(), GE = M->global_end(); GI != GE; ++GI) - EEState.RemoveMapping(GI); + EEState.RemoveMapping(getMangledName(GI)); +} + +uint64_t ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, + void *Addr) { + MutexGuard locked(lock); + return updateGlobalMapping(getMangledName(GV), (uint64_t) Addr); } -void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) { +uint64_t ExecutionEngine::updateGlobalMapping(StringRef Name, uint64_t Addr) { MutexGuard locked(lock); ExecutionEngineState::GlobalAddressMapTy &Map = @@ -211,10 +234,10 @@ void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) { // Deleting from the mapping? if (!Addr) - return EEState.RemoveMapping(GV); + return EEState.RemoveMapping(Name); - void *&CurVal = Map[GV]; - void *OldVal = CurVal; + uint64_t &CurVal = Map[Name]; + uint64_t OldVal = CurVal; if (CurVal && !EEState.getGlobalAddressReverseMap().empty()) EEState.getGlobalAddressReverseMap().erase(CurVal); @@ -222,20 +245,35 @@ void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) { // If we are using the reverse mapping, add it too. if (!EEState.getGlobalAddressReverseMap().empty()) { - AssertingVH<const GlobalValue> &V = - EEState.getGlobalAddressReverseMap()[Addr]; - assert((!V || !GV) && "GlobalMapping already established!"); - V = GV; + std::string &V = EEState.getGlobalAddressReverseMap()[CurVal]; + assert((!V.empty() || !Name.empty()) && + "GlobalMapping already established!"); + V = Name; } return OldVal; } -void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) { +uint64_t ExecutionEngine::getAddressToGlobalIfAvailable(StringRef S) { MutexGuard locked(lock); - + uint64_t Address = 0; ExecutionEngineState::GlobalAddressMapTy::iterator I = - EEState.getGlobalAddressMap().find(GV); - return I != EEState.getGlobalAddressMap().end() ? I->second : nullptr; + EEState.getGlobalAddressMap().find(S); + if (I != EEState.getGlobalAddressMap().end()) + Address = I->second; + return Address; +} + + +void *ExecutionEngine::getPointerToGlobalIfAvailable(StringRef S) { + MutexGuard locked(lock); + if (void* Address = (void *) getAddressToGlobalIfAvailable(S)) + return Address; + return nullptr; +} + +void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) { + MutexGuard locked(lock); + return getPointerToGlobalIfAvailable(getMangledName(GV)); } const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) { @@ -244,15 +282,25 @@ const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) { // If we haven't computed the reverse mapping yet, do so first. if (EEState.getGlobalAddressReverseMap().empty()) { for (ExecutionEngineState::GlobalAddressMapTy::iterator - I = EEState.getGlobalAddressMap().begin(), - E = EEState.getGlobalAddressMap().end(); I != E; ++I) + I = EEState.getGlobalAddressMap().begin(), + E = EEState.getGlobalAddressMap().end(); I != E; ++I) { + StringRef Name = I->first(); + uint64_t Addr = I->second; EEState.getGlobalAddressReverseMap().insert(std::make_pair( - I->second, I->first)); + Addr, Name)); + } } - std::map<void *, AssertingVH<const GlobalValue> >::iterator I = - EEState.getGlobalAddressReverseMap().find(Addr); - return I != EEState.getGlobalAddressReverseMap().end() ? I->second : nullptr; + std::map<uint64_t, std::string>::iterator I = + EEState.getGlobalAddressReverseMap().find((uint64_t) Addr); + + if (I != EEState.getGlobalAddressReverseMap().end()) { + StringRef Name = I->second; + for (unsigned i = 0, e = Modules.size(); i != e; ++i) + if (GlobalValue *GV = Modules[i]->getNamedValue(Name)) + return GV; + } + return nullptr; } namespace { @@ -404,8 +452,9 @@ EngineBuilder::EngineBuilder() : EngineBuilder(nullptr) {} EngineBuilder::EngineBuilder(std::unique_ptr<Module> M) : M(std::move(M)), WhichEngine(EngineKind::Either), ErrorStr(nullptr), - OptLevel(CodeGenOpt::Default), MCJMM(nullptr), RelocModel(Reloc::Default), - CMModel(CodeModel::JITDefault), UseOrcMCJITReplacement(false) { + OptLevel(CodeGenOpt::Default), MemMgr(nullptr), Resolver(nullptr), + RelocModel(Reloc::Default), CMModel(CodeModel::JITDefault), + UseOrcMCJITReplacement(false) { // IR module verification is enabled by default in debug builds, and disabled // by default in release builds. #ifndef NDEBUG @@ -419,7 +468,21 @@ EngineBuilder::~EngineBuilder() = default; EngineBuilder &EngineBuilder::setMCJITMemoryManager( std::unique_ptr<RTDyldMemoryManager> mcjmm) { - MCJMM = std::move(mcjmm); + auto SharedMM = std::shared_ptr<RTDyldMemoryManager>(std::move(mcjmm)); + MemMgr = SharedMM; + Resolver = SharedMM; + return *this; +} + +EngineBuilder& +EngineBuilder::setMemoryManager(std::unique_ptr<MCJITMemoryManager> MM) { + MemMgr = std::shared_ptr<MCJITMemoryManager>(std::move(MM)); + return *this; +} + +EngineBuilder& +EngineBuilder::setSymbolResolver(std::unique_ptr<RuntimeDyld::SymbolResolver> SR) { + Resolver = std::shared_ptr<RuntimeDyld::SymbolResolver>(std::move(SR)); return *this; } @@ -434,7 +497,7 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { // If the user specified a memory manager but didn't specify which engine to // create, we assume they only want the JIT, and we fail if they only want // the interpreter. - if (MCJMM) { + if (MemMgr) { if (WhichEngine & EngineKind::JIT) WhichEngine = EngineKind::JIT; else { @@ -456,12 +519,13 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { ExecutionEngine *EE = nullptr; if (ExecutionEngine::OrcMCJITReplacementCtor && UseOrcMCJITReplacement) { - EE = ExecutionEngine::OrcMCJITReplacementCtor(ErrorStr, std::move(MCJMM), + EE = ExecutionEngine::OrcMCJITReplacementCtor(ErrorStr, std::move(MemMgr), + std::move(Resolver), std::move(TheTM)); EE->addModule(std::move(M)); } else if (ExecutionEngine::MCJITCtor) - EE = ExecutionEngine::MCJITCtor(std::move(M), ErrorStr, std::move(MCJMM), - std::move(TheTM)); + EE = ExecutionEngine::MCJITCtor(std::move(M), ErrorStr, std::move(MemMgr), + std::move(Resolver), std::move(TheTM)); if (EE) { EE->setVerifyModules(VerifyModules); @@ -492,7 +556,7 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) { return getPointerToFunction(F); MutexGuard locked(lock); - if (void *P = EEState.getGlobalAddressMap()[GV]) + if (void* P = getPointerToGlobalIfAvailable(GV)) return P; // Global variable might have been added since interpreter started. @@ -502,7 +566,7 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) { else llvm_unreachable("Global hasn't had an address allocated yet!"); - return EEState.getGlobalAddressMap()[GV]; + return getPointerToGlobalIfAvailable(GV); } /// \brief Converts a Constant* into a GenericValue, including handling of @@ -1274,25 +1338,3 @@ void ExecutionEngine::EmitGlobalVariable(const GlobalVariable *GV) { NumInitBytes += (unsigned)GVSize; ++NumGlobals; } - -ExecutionEngineState::ExecutionEngineState(ExecutionEngine &EE) - : EE(EE), GlobalAddressMap(this) { -} - -sys::Mutex * -ExecutionEngineState::AddressMapConfig::getMutex(ExecutionEngineState *EES) { - return &EES->EE.lock; -} - -void ExecutionEngineState::AddressMapConfig::onDelete(ExecutionEngineState *EES, - const GlobalValue *Old) { - void *OldVal = EES->GlobalAddressMap.lookup(Old); - EES->GlobalAddressReverseMap.erase(OldVal); -} - -void ExecutionEngineState::AddressMapConfig::onRAUW(ExecutionEngineState *, - const GlobalValue *, - const GlobalValue *) { - llvm_unreachable("The ExecutionEngine doesn't know how to handle a" - " RAUW on a value it has a global mapping for."); -} diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp index aaa53f0..22ff311 100644 --- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp +++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp @@ -351,7 +351,7 @@ class SimpleBindingMemoryManager : public RTDyldMemoryManager { public: SimpleBindingMemoryManager(const SimpleBindingMMFunctions& Functions, void *Opaque); - virtual ~SimpleBindingMemoryManager(); + ~SimpleBindingMemoryManager() override; uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID, diff --git a/lib/ExecutionEngine/GDBRegistrationListener.cpp b/lib/ExecutionEngine/GDBRegistrationListener.cpp index 8ef878c..1ab6203 100644 --- a/lib/ExecutionEngine/GDBRegistrationListener.cpp +++ b/lib/ExecutionEngine/GDBRegistrationListener.cpp @@ -103,7 +103,7 @@ public: /// Unregisters each object that was previously registered and releases all /// internal resources. - virtual ~GDBJITRegistrationListener(); + ~GDBJITRegistrationListener() override; /// Creates an entry in the JIT registry for the buffer @p Object, /// which must contain an object file in executable memory with any diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp index aa32452..4135900 100644 --- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp +++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Config/config.h" -#include "EventListenerCommon.h" #include "IntelJITEventsWrapper.h" #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineFunction.h" @@ -29,7 +28,6 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -using namespace llvm::jitprofiling; using namespace llvm::object; #define DEBUG_TYPE "amplifier-jit-event-listener" @@ -41,7 +39,6 @@ class IntelJITEventListener : public JITEventListener { std::unique_ptr<IntelJITEventsWrapper> Wrapper; MethodIDMap MethodIDs; - FilenameCache Filenames; typedef SmallVector<const void *, 64> MethodAddressVector; typedef DenseMap<const void *, MethodAddressVector> ObjectMap; diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp index 2e8eb16..a26740b 100644 --- a/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -316,7 +316,7 @@ void Interpreter::visitICmpInst(ICmpInst &I) { #define IMPLEMENT_VECTOR_FCMP(OP) \ case Type::VectorTyID: \ - if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) { \ + if (cast<VectorType>(Ty)->getElementType()->isFloatTy()) { \ IMPLEMENT_VECTOR_FCMP_T(OP, Float); \ } else { \ IMPLEMENT_VECTOR_FCMP_T(OP, Double); \ @@ -363,7 +363,7 @@ static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2, #define MASK_VECTOR_NANS(TY, X,Y, FLAG) \ if (TY->isVectorTy()) { \ - if (dyn_cast<VectorType>(TY)->getElementType()->isFloatTy()) { \ + if (cast<VectorType>(TY)->getElementType()->isFloatTy()) { \ MASK_VECTOR_NANS_T(X, Y, Float, FLAG) \ } else { \ MASK_VECTOR_NANS_T(X, Y, Double, FLAG) \ @@ -536,7 +536,7 @@ static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2, if(Ty->isVectorTy()) { assert(Src1.AggregateVal.size() == Src2.AggregateVal.size()); Dest.AggregateVal.resize( Src1.AggregateVal.size() ); - if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) { + if (cast<VectorType>(Ty)->getElementType()->isFloatTy()) { for( size_t _i=0;_i<Src1.AggregateVal.size();_i++) Dest.AggregateVal[_i].IntVal = APInt(1, ( (Src1.AggregateVal[_i].FloatVal == @@ -567,7 +567,7 @@ static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2, if(Ty->isVectorTy()) { assert(Src1.AggregateVal.size() == Src2.AggregateVal.size()); Dest.AggregateVal.resize( Src1.AggregateVal.size() ); - if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) { + if (cast<VectorType>(Ty)->getElementType()->isFloatTy()) { for( size_t _i=0;_i<Src1.AggregateVal.size();_i++) Dest.AggregateVal[_i].IntVal = APInt(1, ( (Src1.AggregateVal[_i].FloatVal != @@ -713,10 +713,10 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) { // Macros to choose appropriate TY: float or double and run operation // execution #define FLOAT_VECTOR_OP(OP) { \ - if (dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) \ + if (cast<VectorType>(Ty)->getElementType()->isFloatTy()) \ FLOAT_VECTOR_FUNCTION(OP, FloatVal) \ else { \ - if (dyn_cast<VectorType>(Ty)->getElementType()->isDoubleTy()) \ + if (cast<VectorType>(Ty)->getElementType()->isDoubleTy()) \ FLOAT_VECTOR_FUNCTION(OP, DoubleVal) \ else { \ dbgs() << "Unhandled type for OP instruction: " << *Ty << "\n"; \ @@ -745,12 +745,12 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) { case Instruction::FMul: FLOAT_VECTOR_OP(*) break; case Instruction::FDiv: FLOAT_VECTOR_OP(/) break; case Instruction::FRem: - if (dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) + if (cast<VectorType>(Ty)->getElementType()->isFloatTy()) for (unsigned i = 0; i < R.AggregateVal.size(); ++i) R.AggregateVal[i].FloatVal = fmod(Src1.AggregateVal[i].FloatVal, Src2.AggregateVal[i].FloatVal); else { - if (dyn_cast<VectorType>(Ty)->getElementType()->isDoubleTy()) + if (cast<VectorType>(Ty)->getElementType()->isDoubleTy()) for (unsigned i = 0; i < R.AggregateVal.size(); ++i) R.AggregateVal[i].DoubleVal = fmod(Src1.AggregateVal[i].DoubleVal, Src2.AggregateVal[i].DoubleVal); diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index b022101..e2fe065 100644 --- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -95,16 +95,15 @@ static ExFunc lookupFunction(const Function *F) { FunctionType *FT = F->getFunctionType(); for (unsigned i = 0, e = FT->getNumContainedTypes(); i != e; ++i) ExtName += getTypeID(FT->getContainedType(i)); - ExtName += "_" + F->getName().str(); + ExtName += ("_" + F->getName()).str(); sys::ScopedLock Writer(*FunctionsLock); ExFunc FnPtr = (*FuncNames)[ExtName]; if (!FnPtr) - FnPtr = (*FuncNames)["lle_X_" + F->getName().str()]; + FnPtr = (*FuncNames)[("lle_X_" + F->getName()).str()]; if (!FnPtr) // Try calling a generic function... if it exists... - FnPtr = (ExFunc)(intptr_t) - sys::DynamicLibrary::SearchForAddressOfSymbol("lle_X_" + - F->getName().str()); + FnPtr = (ExFunc)(intptr_t)sys::DynamicLibrary::SearchForAddressOfSymbol( + ("lle_X_" + F->getName()).str()); if (FnPtr) ExportedFunctions->insert(std::make_pair(F, FnPtr)); // Cache for later return FnPtr; diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h index 2be9c59..0dc0463 100644 --- a/lib/ExecutionEngine/Interpreter/Interpreter.h +++ b/lib/ExecutionEngine/Interpreter/Interpreter.h @@ -108,7 +108,7 @@ class Interpreter : public ExecutionEngine, public InstVisitor<Interpreter> { public: explicit Interpreter(std::unique_ptr<Module> M); - ~Interpreter(); + ~Interpreter() override; /// runAtExitHandlers - Run any functions registered by the program's calls to /// atexit(3), which we intercept and store in AtExitHandlers. diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index 20b8553..7e37afe 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "MCJIT.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/JITEventListener.h" #include "llvm/ExecutionEngine/MCJIT.h" @@ -41,26 +42,35 @@ static struct RegisterJIT { extern "C" void LLVMLinkInMCJIT() { } -ExecutionEngine *MCJIT::createJIT(std::unique_ptr<Module> M, - std::string *ErrorStr, - std::unique_ptr<RTDyldMemoryManager> MemMgr, - std::unique_ptr<TargetMachine> TM) { +ExecutionEngine* +MCJIT::createJIT(std::unique_ptr<Module> M, + std::string *ErrorStr, + std::shared_ptr<MCJITMemoryManager> MemMgr, + std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver, + std::unique_ptr<TargetMachine> TM) { // Try to register the program as a source of symbols to resolve against. // // FIXME: Don't do this here. sys::DynamicLibrary::LoadLibraryPermanently(nullptr, nullptr); - std::unique_ptr<RTDyldMemoryManager> MM = std::move(MemMgr); - if (!MM) - MM = std::unique_ptr<SectionMemoryManager>(new SectionMemoryManager()); + if (!MemMgr || !Resolver) { + auto RTDyldMM = std::make_shared<SectionMemoryManager>(); + if (!MemMgr) + MemMgr = RTDyldMM; + if (!Resolver) + Resolver = RTDyldMM; + } - return new MCJIT(std::move(M), std::move(TM), std::move(MM)); + return new MCJIT(std::move(M), std::move(TM), std::move(MemMgr), + std::move(Resolver)); } MCJIT::MCJIT(std::unique_ptr<Module> M, std::unique_ptr<TargetMachine> tm, - std::unique_ptr<RTDyldMemoryManager> MM) + std::shared_ptr<MCJITMemoryManager> MemMgr, + std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver) : ExecutionEngine(std::move(M)), TM(std::move(tm)), Ctx(nullptr), - MemMgr(this, std::move(MM)), Dyld(&MemMgr), ObjCache(nullptr) { + MemMgr(std::move(MemMgr)), Resolver(*this, std::move(Resolver)), + Dyld(*this->MemMgr, this->Resolver), ObjCache(nullptr) { // FIXME: We are managing our modules, so we do not want the base class // ExecutionEngine to manage them as well. To avoid double destruction // of the first (and only) module added in ExecutionEngine constructor @@ -221,7 +231,7 @@ void MCJIT::finalizeLoadedModules() { Dyld.registerEHFrames(); // Set page permissions. - MemMgr.finalizeMemory(); + MemMgr->finalizeMemory(); } // FIXME: Rename this. @@ -253,11 +263,11 @@ void MCJIT::finalizeModule(Module *M) { finalizeLoadedModules(); } -uint64_t MCJIT::getExistingSymbolAddress(const std::string &Name) { +RuntimeDyld::SymbolInfo MCJIT::findExistingSymbol(const std::string &Name) { Mangler Mang(TM->getDataLayout()); SmallString<128> FullName; Mang.getNameWithPrefix(FullName, Name); - return Dyld.getSymbol(FullName).getAddress(); + return Dyld.getSymbol(FullName); } Module *MCJIT::findModuleForSymbol(const std::string &Name, @@ -284,14 +294,17 @@ Module *MCJIT::findModuleForSymbol(const std::string &Name, } uint64_t MCJIT::getSymbolAddress(const std::string &Name, - bool CheckFunctionsOnly) -{ + bool CheckFunctionsOnly) { + return findSymbol(Name, CheckFunctionsOnly).getAddress(); +} + +RuntimeDyld::SymbolInfo MCJIT::findSymbol(const std::string &Name, + bool CheckFunctionsOnly) { MutexGuard locked(lock); // First, check to see if we already have this symbol. - uint64_t Addr = getExistingSymbolAddress(Name); - if (Addr) - return Addr; + if (auto Sym = findExistingSymbol(Name)) + return Sym; for (object::OwningBinary<object::Archive> &OB : Archives) { object::Archive *A = OB.getBinary(); @@ -310,9 +323,8 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name, // This causes the object file to be loaded. addObjectFile(std::move(OF)); // The address should be here now. - Addr = getExistingSymbolAddress(Name); - if (Addr) - return Addr; + if (auto Sym = findExistingSymbol(Name)) + return Sym; } } } @@ -323,15 +335,18 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name, generateCodeForModule(M); // Check the RuntimeDyld table again, it should be there now. - return getExistingSymbolAddress(Name); + return findExistingSymbol(Name); } // If a LazyFunctionCreator is installed, use it to get/create the function. // FIXME: Should we instead have a LazySymbolCreator callback? - if (LazyFunctionCreator) - Addr = (uint64_t)LazyFunctionCreator(Name); + if (LazyFunctionCreator) { + auto Addr = static_cast<uint64_t>( + reinterpret_cast<uintptr_t>(LazyFunctionCreator(Name))); + return RuntimeDyld::SymbolInfo(Addr, JITSymbolFlags::Exported); + } - return Addr; + return nullptr; } uint64_t MCJIT::getGlobalValueAddress(const std::string &Name) { @@ -528,7 +543,9 @@ GenericValue MCJIT::runFunction(Function *F, void *MCJIT::getPointerToNamedFunction(StringRef Name, bool AbortOnFailure) { if (!isSymbolSearchingDisabled()) { - void *ptr = MemMgr.getPointerToNamedFunction(Name, false); + void *ptr = + reinterpret_cast<void*>( + static_cast<uintptr_t>(Resolver.findSymbol(Name).getAddress())); if (ptr) return ptr; } @@ -566,7 +583,7 @@ void MCJIT::UnregisterJITEventListener(JITEventListener *L) { void MCJIT::NotifyObjectEmitted(const object::ObjectFile& Obj, const RuntimeDyld::LoadedObjectInfo &L) { MutexGuard locked(lock); - MemMgr.notifyObjectLoaded(this, Obj); + MemMgr->notifyObjectLoaded(this, Obj); for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) { EventListeners[I]->NotifyObjectEmitted(Obj, L); } @@ -578,15 +595,16 @@ void MCJIT::NotifyFreeingObject(const object::ObjectFile& Obj) { L->NotifyFreeingObject(Obj); } -uint64_t LinkingMemoryManager::getSymbolAddress(const std::string &Name) { - uint64_t Result = ParentEngine->getSymbolAddress(Name, false); +RuntimeDyld::SymbolInfo +LinkingSymbolResolver::findSymbol(const std::string &Name) { + auto Result = ParentEngine.findSymbol(Name, false); // If the symbols wasn't found and it begins with an underscore, try again // without the underscore. if (!Result && Name[0] == '_') - Result = ParentEngine->getSymbolAddress(Name.substr(1), false); + Result = ParentEngine.findSymbol(Name.substr(1), false); if (Result) return Result; - if (ParentEngine->isSymbolSearchingDisabled()) - return 0; - return ClientMM->getSymbolAddress(Name); + if (ParentEngine.isSymbolSearchingDisabled()) + return nullptr; + return ClientResolver->findSymbol(Name); } diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h index de4a8f6..59e9949 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.h +++ b/lib/ExecutionEngine/MCJIT/MCJIT.h @@ -16,6 +16,7 @@ #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/ObjectCache.h" #include "llvm/ExecutionEngine/ObjectMemoryBuffer.h" +#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/IR/Module.h" @@ -26,59 +27,23 @@ class MCJIT; // functions across modules that it owns. It aggregates the memory manager // that is passed in to the MCJIT constructor and defers most functionality // to that object. -class LinkingMemoryManager : public RTDyldMemoryManager { +class LinkingSymbolResolver : public RuntimeDyld::SymbolResolver { public: - LinkingMemoryManager(MCJIT *Parent, - std::unique_ptr<RTDyldMemoryManager> MM) - : ParentEngine(Parent), ClientMM(std::move(MM)) {} + LinkingSymbolResolver(MCJIT &Parent, + std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver) + : ParentEngine(Parent), ClientResolver(std::move(Resolver)) {} - uint64_t getSymbolAddress(const std::string &Name) override; + RuntimeDyld::SymbolInfo findSymbol(const std::string &Name) override; - // Functions deferred to client memory manager - uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, - unsigned SectionID, - StringRef SectionName) override { - return ClientMM->allocateCodeSection(Size, Alignment, SectionID, SectionName); - } - - uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, - unsigned SectionID, StringRef SectionName, - bool IsReadOnly) override { - return ClientMM->allocateDataSection(Size, Alignment, - SectionID, SectionName, IsReadOnly); - } - - void reserveAllocationSpace(uintptr_t CodeSize, uintptr_t DataSizeRO, - uintptr_t DataSizeRW) override { - return ClientMM->reserveAllocationSpace(CodeSize, DataSizeRO, DataSizeRW); - } - - bool needsToReserveAllocationSpace() override { - return ClientMM->needsToReserveAllocationSpace(); - } - - void notifyObjectLoaded(ExecutionEngine *EE, - const object::ObjectFile &Obj) override { - ClientMM->notifyObjectLoaded(EE, Obj); - } - - void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, - size_t Size) override { - ClientMM->registerEHFrames(Addr, LoadAddr, Size); - } - - void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, - size_t Size) override { - ClientMM->deregisterEHFrames(Addr, LoadAddr, Size); - } - - bool finalizeMemory(std::string *ErrMsg = nullptr) override { - return ClientMM->finalizeMemory(ErrMsg); + // MCJIT doesn't support logical dylibs. + RuntimeDyld::SymbolInfo + findSymbolInLogicalDylib(const std::string &Name) override { + return nullptr; } private: - MCJIT *ParentEngine; - std::unique_ptr<RTDyldMemoryManager> ClientMM; + MCJIT &ParentEngine; + std::shared_ptr<RuntimeDyld::SymbolResolver> ClientResolver; }; // About Module states: added->loaded->finalized. @@ -103,7 +68,8 @@ private: class MCJIT : public ExecutionEngine { MCJIT(std::unique_ptr<Module> M, std::unique_ptr<TargetMachine> tm, - std::unique_ptr<RTDyldMemoryManager> MemMgr); + std::shared_ptr<MCJITMemoryManager> MemMgr, + std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver); typedef llvm::SmallPtrSet<Module *, 4> ModulePtrSet; @@ -214,7 +180,8 @@ class MCJIT : public ExecutionEngine { std::unique_ptr<TargetMachine> TM; MCContext *Ctx; - LinkingMemoryManager MemMgr; + std::shared_ptr<MCJITMemoryManager> MemMgr; + LinkingSymbolResolver Resolver; RuntimeDyld Dyld; std::vector<JITEventListener*> EventListeners; @@ -238,7 +205,7 @@ class MCJIT : public ExecutionEngine { ModulePtrSet::iterator E); public: - ~MCJIT(); + ~MCJIT() override; /// @name ExecutionEngine interface implementation /// @{ @@ -324,17 +291,22 @@ public: MCJITCtor = createJIT; } - static ExecutionEngine *createJIT(std::unique_ptr<Module> M, - std::string *ErrorStr, - std::unique_ptr<RTDyldMemoryManager> MemMgr, - std::unique_ptr<TargetMachine> TM); + static ExecutionEngine* + createJIT(std::unique_ptr<Module> M, + std::string *ErrorStr, + std::shared_ptr<MCJITMemoryManager> MemMgr, + std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver, + std::unique_ptr<TargetMachine> TM); // @} + RuntimeDyld::SymbolInfo findSymbol(const std::string &Name, + bool CheckFunctionsOnly); + // DEPRECATED - Please use findSymbol instead. // This is not directly exposed via the ExecutionEngine API, but it is // used by the LinkingMemoryManager. uint64_t getSymbolAddress(const std::string &Name, - bool CheckFunctionsOnly); + bool CheckFunctionsOnly); protected: /// emitObject -- Generate a JITed object in memory from the specified module @@ -348,7 +320,7 @@ protected: const RuntimeDyld::LoadedObjectInfo &L); void NotifyFreeingObject(const object::ObjectFile& Obj); - uint64_t getExistingSymbolAddress(const std::string &Name); + RuntimeDyld::SymbolInfo findExistingSymbol(const std::string &Name); Module *findModuleForSymbol(const std::string &Name, bool CheckFunctionsOnly); }; diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp index 9ab4003..23e7662 100644 --- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp +++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Config/config.h" -#include "EventListenerCommon.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/ExecutionEngine/JITEventListener.h" #include "llvm/ExecutionEngine/OProfileWrapper.h" @@ -28,7 +27,6 @@ #include <fcntl.h> using namespace llvm; -using namespace llvm::jitprofiling; using namespace llvm::object; #define DEBUG_TYPE "oprofile-jit-event-listener" diff --git a/lib/ExecutionEngine/Orc/Android.mk b/lib/ExecutionEngine/Orc/Android.mk index 61c1daf..f28f359 100644 --- a/lib/ExecutionEngine/Orc/Android.mk +++ b/lib/ExecutionEngine/Orc/Android.mk @@ -6,6 +6,7 @@ include $(CLEAR_VARS) LOCAL_SRC_FILES := \ CloneSubModule.cpp \ + ExecutionUtils.cpp \ IndirectionUtils.cpp \ OrcMCJITReplacement.cpp \ OrcTargetSupport.cpp diff --git a/lib/ExecutionEngine/Orc/CMakeLists.txt b/lib/ExecutionEngine/Orc/CMakeLists.txt index b0a8445..b38b459 100644 --- a/lib/ExecutionEngine/Orc/CMakeLists.txt +++ b/lib/ExecutionEngine/Orc/CMakeLists.txt @@ -1,5 +1,6 @@ add_llvm_library(LLVMOrcJIT CloneSubModule.cpp + ExecutionUtils.cpp IndirectionUtils.cpp OrcMCJITReplacement.cpp OrcTargetSupport.cpp diff --git a/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/lib/ExecutionEngine/Orc/ExecutionUtils.cpp new file mode 100644 index 0000000..b7220db --- /dev/null +++ b/lib/ExecutionEngine/Orc/ExecutionUtils.cpp @@ -0,0 +1,102 @@ +//===---- ExecutionUtils.cpp - Utilities for executing functions in Orc ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" + +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" + +namespace llvm { +namespace orc { + +CtorDtorIterator::CtorDtorIterator(const GlobalVariable *GV, bool End) + : InitList( + GV ? dyn_cast_or_null<ConstantArray>(GV->getInitializer()) : nullptr), + I((InitList && End) ? InitList->getNumOperands() : 0) { +} + +bool CtorDtorIterator::operator==(const CtorDtorIterator &Other) const { + assert(InitList == Other.InitList && "Incomparable iterators."); + return I == Other.I; +} + +bool CtorDtorIterator::operator!=(const CtorDtorIterator &Other) const { + return !(*this == Other); +} + +CtorDtorIterator& CtorDtorIterator::operator++() { + ++I; + return *this; +} + +CtorDtorIterator CtorDtorIterator::operator++(int) { + CtorDtorIterator Temp = *this; + ++I; + return Temp; +} + +CtorDtorIterator::Element CtorDtorIterator::operator*() const { + ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(I)); + assert(CS && "Unrecognized type in llvm.global_ctors/llvm.global_dtors"); + + Constant *FuncC = CS->getOperand(1); + Function *Func = nullptr; + + // Extract function pointer, pulling off any casts. + while (FuncC) { + if (Function *F = dyn_cast_or_null<Function>(FuncC)) { + Func = F; + break; + } else if (ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(FuncC)) { + if (CE->isCast()) + FuncC = dyn_cast_or_null<ConstantExpr>(CE->getOperand(0)); + else + break; + } else { + // This isn't anything we recognize. Bail out with Func left set to null. + break; + } + } + + ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0)); + Value *Data = CS->getOperand(2); + return Element(Priority->getZExtValue(), Func, Data); +} + +iterator_range<CtorDtorIterator> getConstructors(const Module &M) { + const GlobalVariable *CtorsList = M.getNamedGlobal("llvm.global_ctors"); + return make_range(CtorDtorIterator(CtorsList, false), + CtorDtorIterator(CtorsList, true)); +} + +iterator_range<CtorDtorIterator> getDestructors(const Module &M) { + const GlobalVariable *DtorsList = M.getNamedGlobal("llvm.global_dtors"); + return make_range(CtorDtorIterator(DtorsList, false), + CtorDtorIterator(DtorsList, true)); +} + +void LocalCXXRuntimeOverrides::runDestructors() { + auto& CXXDestructorDataPairs = DSOHandleOverride; + for (auto &P : CXXDestructorDataPairs) + P.first(P.second); + CXXDestructorDataPairs.clear(); +} + +int LocalCXXRuntimeOverrides::CXAAtExitOverride(DestructorPtr Destructor, + void *Arg, void *DSOHandle) { + auto& CXXDestructorDataPairs = + *reinterpret_cast<CXXDestructorDataPairList*>(DSOHandle); + CXXDestructorDataPairs.push_back(std::make_pair(Destructor, Arg)); + return 0; +} + +} // End namespace orc. +} // End namespace llvm. diff --git a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp index 8cf490f..ebeedef 100644 --- a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -14,17 +14,25 @@ #include "llvm/IR/CallSite.h" #include "llvm/IR/IRBuilder.h" #include <set> +#include <sstream> namespace llvm { namespace orc { -GlobalVariable* createImplPointer(Function &F, const Twine &Name, - Constant *Initializer) { - assert(F.getParent() && "Function isn't in a module."); +Constant* createIRTypedAddress(FunctionType &FT, TargetAddress Addr) { + Constant *AddrIntVal = + ConstantInt::get(Type::getInt64Ty(FT.getContext()), Addr); + Constant *AddrPtrVal = + ConstantExpr::getCast(Instruction::IntToPtr, AddrIntVal, + PointerType::get(&FT, 0)); + return AddrPtrVal; +} + +GlobalVariable* createImplPointer(PointerType &PT, Module &M, + const Twine &Name, Constant *Initializer) { if (!Initializer) - Initializer = Constant::getNullValue(F.getType()); - Module &M = *F.getParent(); - return new GlobalVariable(M, F.getType(), false, GlobalValue::ExternalLinkage, + Initializer = Constant::getNullValue(&PT); + return new GlobalVariable(M, &PT, false, GlobalValue::ExternalLinkage, Initializer, Name, nullptr, GlobalValue::NotThreadLocal, 0, true); } @@ -44,8 +52,41 @@ void makeStub(Function &F, GlobalVariable &ImplPointer) { Builder.CreateRet(Call); } +// Utility class for renaming global values and functions during partitioning. +class GlobalRenamer { +public: + + static bool needsRenaming(const Value &New) { + if (!New.hasName() || New.getName().startswith("\01L")) + return true; + return false; + } + + const std::string& getRename(const Value &Orig) { + // See if we have a name for this global. + { + auto I = Names.find(&Orig); + if (I != Names.end()) + return I->second; + } + + // Nope. Create a new one. + // FIXME: Use a more robust uniquing scheme. (This may blow up if the user + // writes a "__orc_anon[[:digit:]]* method). + unsigned ID = Names.size(); + std::ostringstream NameStream; + NameStream << "__orc_anon" << ID++; + auto I = Names.insert(std::make_pair(&Orig, NameStream.str())); + return I.first->second; + } +private: + DenseMap<const Value*, std::string> Names; +}; + void partition(Module &M, const ModulePartitionMap &PMap) { + GlobalRenamer Renamer; + for (auto &KVPair : PMap) { auto ExtractGlobalVars = @@ -54,20 +95,26 @@ void partition(Module &M, const ModulePartitionMap &PMap) { if (KVPair.second.count(&Orig)) { copyGVInitializer(New, Orig, VMap); } - if (New.getLinkage() == GlobalValue::PrivateLinkage) { + if (New.hasLocalLinkage()) { + if (Renamer.needsRenaming(New)) + New.setName(Renamer.getRename(Orig)); New.setLinkage(GlobalValue::ExternalLinkage); New.setVisibility(GlobalValue::HiddenVisibility); } + assert(!Renamer.needsRenaming(New) && "Invalid global name."); }; auto ExtractFunctions = [&](Function &New, const Function &Orig, ValueToValueMapTy &VMap) { if (KVPair.second.count(&Orig)) copyFunctionBody(New, Orig, VMap); - if (New.getLinkage() == GlobalValue::InternalLinkage) { + if (New.hasLocalLinkage()) { + if (Renamer.needsRenaming(New)) + New.setName(Renamer.getRename(Orig)); New.setLinkage(GlobalValue::ExternalLinkage); New.setVisibility(GlobalValue::HiddenVisibility); } + assert(!Renamer.needsRenaming(New) && "Invalid function name."); }; CloneSubModule(*KVPair.first, M, ExtractGlobalVars, ExtractFunctions, diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h index 00e39bb..4023344 100644 --- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h +++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h @@ -26,15 +26,21 @@ namespace orc { class OrcMCJITReplacement : public ExecutionEngine { - class ForwardingRTDyldMM : public RTDyldMemoryManager { + // OrcMCJITReplacement needs to do a little extra book-keeping to ensure that + // Orc's automatic finalization doesn't kick in earlier than MCJIT clients are + // expecting - see finalizeMemory. + class MCJITReplacementMemMgr : public MCJITMemoryManager { public: - ForwardingRTDyldMM(OrcMCJITReplacement &M) : M(M) {} + MCJITReplacementMemMgr(OrcMCJITReplacement &M, + std::shared_ptr<MCJITMemoryManager> ClientMM) + : M(M), ClientMM(std::move(ClientMM)) {} uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID, StringRef SectionName) override { uint8_t *Addr = - M.MM->allocateCodeSection(Size, Alignment, SectionID, SectionName); + ClientMM->allocateCodeSection(Size, Alignment, SectionID, + SectionName); M.SectionsAllocatedSinceLastLoad.insert(Addr); return Addr; } @@ -42,43 +48,35 @@ class OrcMCJITReplacement : public ExecutionEngine { uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, unsigned SectionID, StringRef SectionName, bool IsReadOnly) override { - uint8_t *Addr = M.MM->allocateDataSection(Size, Alignment, SectionID, - SectionName, IsReadOnly); + uint8_t *Addr = ClientMM->allocateDataSection(Size, Alignment, SectionID, + SectionName, IsReadOnly); M.SectionsAllocatedSinceLastLoad.insert(Addr); return Addr; } void reserveAllocationSpace(uintptr_t CodeSize, uintptr_t DataSizeRO, uintptr_t DataSizeRW) override { - return M.MM->reserveAllocationSpace(CodeSize, DataSizeRO, DataSizeRW); + return ClientMM->reserveAllocationSpace(CodeSize, DataSizeRO, + DataSizeRW); } bool needsToReserveAllocationSpace() override { - return M.MM->needsToReserveAllocationSpace(); + return ClientMM->needsToReserveAllocationSpace(); } void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) override { - return M.MM->registerEHFrames(Addr, LoadAddr, Size); + return ClientMM->registerEHFrames(Addr, LoadAddr, Size); } void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) override { - return M.MM->deregisterEHFrames(Addr, LoadAddr, Size); - } - - uint64_t getSymbolAddress(const std::string &Name) override { - return M.getSymbolAddressWithoutMangling(Name); - } - - void *getPointerToNamedFunction(const std::string &Name, - bool AbortOnFailure = true) override { - return M.MM->getPointerToNamedFunction(Name, AbortOnFailure); + return ClientMM->deregisterEHFrames(Addr, LoadAddr, Size); } void notifyObjectLoaded(ExecutionEngine *EE, const object::ObjectFile &O) override { - return M.MM->notifyObjectLoaded(EE, O); + return ClientMM->notifyObjectLoaded(EE, O); } bool finalizeMemory(std::string *ErrMsg = nullptr) override { @@ -96,21 +94,41 @@ class OrcMCJITReplacement : public ExecutionEngine { // get more than one set of objects loaded but not yet finalized is if // they were loaded during relocation of another set. if (M.UnfinalizedSections.size() == 1) - return M.MM->finalizeMemory(ErrMsg); + return ClientMM->finalizeMemory(ErrMsg); return false; } private: OrcMCJITReplacement &M; + std::shared_ptr<MCJITMemoryManager> ClientMM; + }; + + class LinkingResolver : public RuntimeDyld::SymbolResolver { + public: + LinkingResolver(OrcMCJITReplacement &M) : M(M) {} + + RuntimeDyld::SymbolInfo findSymbol(const std::string &Name) override { + return M.findMangledSymbol(Name); + } + + RuntimeDyld::SymbolInfo + findSymbolInLogicalDylib(const std::string &Name) override { + return M.ClientResolver->findSymbolInLogicalDylib(Name); + } + + private: + OrcMCJITReplacement &M; }; private: static ExecutionEngine * createOrcMCJITReplacement(std::string *ErrorMsg, - std::unique_ptr<RTDyldMemoryManager> OrcJMM, + std::shared_ptr<MCJITMemoryManager> MemMgr, + std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver, std::unique_ptr<TargetMachine> TM) { - return new OrcMCJITReplacement(std::move(OrcJMM), std::move(TM)); + return new OrcMCJITReplacement(std::move(MemMgr), std::move(Resolver), + std::move(TM)); } public: @@ -118,12 +136,15 @@ public: OrcMCJITReplacementCtor = createOrcMCJITReplacement; } - OrcMCJITReplacement(std::unique_ptr<RTDyldMemoryManager> MM, - std::unique_ptr<TargetMachine> TM) - : TM(std::move(TM)), MM(std::move(MM)), Mang(this->TM->getDataLayout()), + OrcMCJITReplacement( + std::shared_ptr<MCJITMemoryManager> MemMgr, + std::shared_ptr<RuntimeDyld::SymbolResolver> ClientResolver, + std::unique_ptr<TargetMachine> TM) + : TM(std::move(TM)), MemMgr(*this, std::move(MemMgr)), + Resolver(*this), ClientResolver(std::move(ClientResolver)), + Mang(this->TM->getDataLayout()), NotifyObjectLoaded(*this), NotifyFinalized(*this), - ObjectLayer(ObjectLayerT::CreateRTDyldMMFtor(), NotifyObjectLoaded, - NotifyFinalized), + ObjectLayer(NotifyObjectLoaded, NotifyFinalized), CompileLayer(ObjectLayer, SimpleCompiler(*this->TM)), LazyEmitLayer(CompileLayer) { setDataLayout(this->TM->getDataLayout()); @@ -139,15 +160,13 @@ public: Modules.push_back(std::move(M)); std::vector<Module *> Ms; Ms.push_back(&*Modules.back()); - LazyEmitLayer.addModuleSet(std::move(Ms), - llvm::make_unique<ForwardingRTDyldMM>(*this)); + LazyEmitLayer.addModuleSet(std::move(Ms), &MemMgr, &Resolver); } void addObjectFile(std::unique_ptr<object::ObjectFile> O) override { std::vector<std::unique_ptr<object::ObjectFile>> Objs; Objs.push_back(std::move(O)); - ObjectLayer.addObjectSet(std::move(Objs), - llvm::make_unique<ForwardingRTDyldMM>(*this)); + ObjectLayer.addObjectSet(std::move(Objs), &MemMgr, &Resolver); } void addObjectFile(object::OwningBinary<object::ObjectFile> O) override { @@ -157,8 +176,7 @@ public: std::vector<std::unique_ptr<object::ObjectFile>> Objs; Objs.push_back(std::move(Obj)); auto H = - ObjectLayer.addObjectSet(std::move(Objs), - llvm::make_unique<ForwardingRTDyldMM>(*this)); + ObjectLayer.addObjectSet(std::move(Objs), &MemMgr, &Resolver); std::vector<std::unique_ptr<MemoryBuffer>> Bufs; Bufs.push_back(std::move(Buf)); @@ -170,7 +188,11 @@ public: } uint64_t getSymbolAddress(StringRef Name) { - return getSymbolAddressWithoutMangling(Mangle(Name)); + return findSymbol(Name).getAddress(); + } + + RuntimeDyld::SymbolInfo findSymbol(StringRef Name) { + return findMangledSymbol(Mangle(Name)); } void finalizeObject() override { @@ -214,18 +236,19 @@ public: } private: - uint64_t getSymbolAddressWithoutMangling(StringRef Name) { - if (uint64_t Addr = LazyEmitLayer.findSymbol(Name, false).getAddress()) - return Addr; - if (uint64_t Addr = MM->getSymbolAddress(Name)) - return Addr; - if (uint64_t Addr = scanArchives(Name)) - return Addr; - return 0; + RuntimeDyld::SymbolInfo findMangledSymbol(StringRef Name) { + if (auto Sym = LazyEmitLayer.findSymbol(Name, false)) + return RuntimeDyld::SymbolInfo(Sym.getAddress(), Sym.getFlags()); + if (auto Sym = ClientResolver->findSymbol(Name)) + return RuntimeDyld::SymbolInfo(Sym.getAddress(), Sym.getFlags()); + if (auto Sym = scanArchives(Name)) + return RuntimeDyld::SymbolInfo(Sym.getAddress(), Sym.getFlags()); + + return nullptr; } - uint64_t scanArchives(StringRef Name) { + JITSymbol scanArchives(StringRef Name) { for (object::OwningBinary<object::Archive> &OB : Archives) { object::Archive *A = OB.getBinary(); // Look for our symbols in each Archive @@ -241,14 +264,13 @@ private: std::vector<std::unique_ptr<object::ObjectFile>> ObjSet; ObjSet.push_back(std::unique_ptr<object::ObjectFile>( static_cast<object::ObjectFile *>(ChildBin.release()))); - ObjectLayer.addObjectSet( - std::move(ObjSet), llvm::make_unique<ForwardingRTDyldMM>(*this)); - if (uint64_t Addr = ObjectLayer.findSymbol(Name, true).getAddress()) - return Addr; + ObjectLayer.addObjectSet(std::move(ObjSet), &MemMgr, &Resolver); + if (auto Sym = ObjectLayer.findSymbol(Name, true)) + return Sym; } } } - return 0; + return nullptr; } class NotifyObjectLoadedT { @@ -267,7 +289,7 @@ private: assert(Objects.size() == Infos.size() && "Incorrect number of Infos for Objects."); for (unsigned I = 0; I < Objects.size(); ++I) - M.MM->notifyObjectLoaded(&M, *Objects[I]); + M.MemMgr.notifyObjectLoaded(&M, *Objects[I]); }; private: @@ -299,7 +321,9 @@ private: typedef LazyEmittingLayer<CompileLayerT> LazyEmitLayerT; std::unique_ptr<TargetMachine> TM; - std::unique_ptr<RTDyldMemoryManager> MM; + MCJITReplacementMemMgr MemMgr; + LinkingResolver Resolver; + std::shared_ptr<RuntimeDyld::SymbolResolver> ClientResolver; Mangler Mang; NotifyObjectLoadedT NotifyObjectLoaded; diff --git a/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp b/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp index 6fe5301..fc56e67 100644 --- a/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp +++ b/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp @@ -6,39 +6,6 @@ using namespace llvm::orc; namespace { -std::array<const char *, 12> X86GPRsToSave = {{ - "rbp", "rbx", "r12", "r13", "r14", "r15", // Callee saved. - "rdi", "rsi", "rdx", "rcx", "r8", "r9", // Int args. -}}; - -std::array<const char *, 8> X86XMMsToSave = {{ - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" // FP args -}}; - -template <typename OStream> unsigned saveX86Regs(OStream &OS) { - for (const auto &GPR : X86GPRsToSave) - OS << " pushq %" << GPR << "\n"; - - OS << " subq $" << (16 * X86XMMsToSave.size()) << ", %rsp\n"; - - for (unsigned i = 0; i < X86XMMsToSave.size(); ++i) - OS << " movdqu %" << X86XMMsToSave[i] << ", " - << (16 * (X86XMMsToSave.size() - i - 1)) << "(%rsp)\n"; - - return (8 * X86GPRsToSave.size()) + (16 * X86XMMsToSave.size()); -} - -template <typename OStream> void restoreX86Regs(OStream &OS) { - for (unsigned i = 0; i < X86XMMsToSave.size(); ++i) - OS << " movdqu " << (16 * i) << "(%rsp), %" - << X86XMMsToSave[(X86XMMsToSave.size() - i - 1)] << "\n"; - OS << " addq $" << (16 * X86XMMsToSave.size()) << ", %rsp\n"; - - for (unsigned i = 0; i < X86GPRsToSave.size(); ++i) - OS << " popq %" << X86GPRsToSave[X86GPRsToSave.size() - i - 1] << "\n"; -} - -template <typename TargetT> uint64_t executeCompileCallback(JITCompileCallbackManagerBase *JCBM, TargetAddress CallbackID) { return JCBM->executeCompileCallback(CallbackID); @@ -53,14 +20,28 @@ const char* OrcX86_64::ResolverBlockName = "orc_resolver_block"; void OrcX86_64::insertResolverBlock( Module &M, JITCompileCallbackManagerBase &JCBM) { + + // Trampoline code-sequence length, used to get trampoline address from return + // address. const unsigned X86_64_TrampolineLength = 6; - auto CallbackPtr = executeCompileCallback<OrcX86_64>; + + // List of x86-64 GPRs to save. Note - RBP saved separately below. + std::array<const char *, 14> GPRs = {{ + "rax", "rbx", "rcx", "rdx", + "rsi", "rdi", "r8", "r9", + "r10", "r11", "r12", "r13", + "r14", "r15" + }}; + + // Address of the executeCompileCallback function. uint64_t CallbackAddr = - static_cast<uint64_t>(reinterpret_cast<uintptr_t>(CallbackPtr)); + static_cast<uint64_t>( + reinterpret_cast<uintptr_t>(executeCompileCallback)); std::ostringstream AsmStream; Triple TT(M.getTargetTriple()); + // Switch to text section. if (TT.getOS() == Triple::Darwin) AsmStream << ".section __TEXT,__text,regular,pure_instructions\n" << ".align 4, 0x90\n"; @@ -68,24 +49,51 @@ void OrcX86_64::insertResolverBlock( AsmStream << ".text\n" << ".align 16, 0x90\n"; + // Bake in a pointer to the callback manager immediately before the + // start of the resolver function. AsmStream << "jit_callback_manager_addr:\n" - << " .quad " << &JCBM << "\n" - << ResolverBlockName << ":\n"; - - uint64_t ReturnAddrOffset = saveX86Regs(AsmStream); - - // Compute index, load object address, and call JIT. - AsmStream << " leaq jit_callback_manager_addr(%rip), %rdi\n" + << " .quad " << &JCBM << "\n"; + + // Start the resolver function. + AsmStream << ResolverBlockName << ":\n" + << " pushq %rbp\n" + << " movq %rsp, %rbp\n"; + + // Store the GPRs. + for (const auto &GPR : GPRs) + AsmStream << " pushq %" << GPR << "\n"; + + // Store floating-point state with FXSAVE. + // Note: We need to keep the stack 16-byte aligned, so if we've emitted an odd + // number of 64-bit pushes so far (GPRs.size() plus 1 for RBP) then add + // an extra 64 bits of padding to the FXSave area. + unsigned Padding = (GPRs.size() + 1) % 2 ? 8 : 0; + unsigned FXSaveSize = 512 + Padding; + AsmStream << " subq $" << FXSaveSize << ", %rsp\n" + << " fxsave (%rsp)\n" + + // Load callback manager address, compute trampoline address, call JIT. + << " lea jit_callback_manager_addr(%rip), %rdi\n" << " movq (%rdi), %rdi\n" - << " movq " << ReturnAddrOffset << "(%rsp), %rsi\n" + << " movq 0x8(%rbp), %rsi\n" << " subq $" << X86_64_TrampolineLength << ", %rsi\n" << " movabsq $" << CallbackAddr << ", %rax\n" << " callq *%rax\n" - << " movq %rax, " << ReturnAddrOffset << "(%rsp)\n"; - restoreX86Regs(AsmStream); + // Replace the return to the trampoline with the return address of the + // compiled function body. + << " movq %rax, 0x8(%rbp)\n" + + // Restore the floating point state. + << " fxrstor (%rsp)\n" + << " addq $" << FXSaveSize << ", %rsp\n"; + + for (const auto &GPR : make_range(GPRs.rbegin(), GPRs.rend())) + AsmStream << " popq %" << GPR << "\n"; - AsmStream << " retq\n"; + // Restore original RBP and return to compiled function body. + AsmStream << " popq %rbp\n" + << " retq\n"; M.appendModuleInlineAsm(AsmStream.str()); } diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index a0ed7cf..a13ecb7 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -57,7 +57,8 @@ static void dumpSectionMemory(const SectionEntry &S, StringRef State) { unsigned BytesRemaining = S.Size; if (StartPadding) { - dbgs() << "\n" << format("0x%016" PRIx64, LoadAddr & ~(ColsPerRow - 1)) << ":"; + dbgs() << "\n" << format("0x%016" PRIx64, + LoadAddr & ~(uint64_t)(ColsPerRow - 1)) << ":"; while (StartPadding--) dbgs() << " "; } @@ -92,7 +93,7 @@ void RuntimeDyldImpl::resolveRelocations() { // entry provides the section to which the relocation will be applied. uint64_t Addr = Sections[i].LoadAddress; DEBUG(dbgs() << "Resolving relocations Section #" << i << "\t" - << format("0x%x", Addr) << "\n"); + << format("%p", (uintptr_t)Addr) << "\n"); DEBUG(dumpSectionMemory(Sections[i], "before relocations")); resolveRelocationList(Relocations[i], Addr); DEBUG(dumpSectionMemory(Sections[i], "after relocations")); @@ -151,10 +152,10 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { // Compute the memory size required to load all sections to be loaded // and pass this information to the memory manager - if (MemMgr->needsToReserveAllocationSpace()) { + if (MemMgr.needsToReserveAllocationSpace()) { uint64_t CodeSize = 0, DataSizeRO = 0, DataSizeRW = 0; computeTotalAllocSize(Obj, CodeSize, DataSizeRO, DataSizeRW); - MemMgr->reserveAllocationSpace(CodeSize, DataSizeRO, DataSizeRW); + MemMgr.reserveAllocationSpace(CodeSize, DataSizeRO, DataSizeRW); } // Used sections from the object file @@ -360,19 +361,20 @@ void RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj, if (Name == ".eh_frame") SectionSize += 4; - if (SectionSize > 0) { - // save the total size of the section - if (IsCode) { - CodeSectionSizes.push_back(SectionSize); - } else if (IsReadOnly) { - ROSectionSizes.push_back(SectionSize); - } else { - RWSectionSizes.push_back(SectionSize); - } - // update the max alignment - if (Alignment > MaxAlignment) { - MaxAlignment = Alignment; - } + if (!SectionSize) + SectionSize = 1; + + if (IsCode) { + CodeSectionSizes.push_back(SectionSize); + } else if (IsReadOnly) { + ROSectionSizes.push_back(SectionSize); + } else { + RWSectionSizes.push_back(SectionSize); + } + + // update the max alignment + if (Alignment > MaxAlignment) { + MaxAlignment = Alignment; } } } @@ -485,7 +487,7 @@ void RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj, // Skip common symbols already elsewhere. if (GlobalSymbolTable.count(Name) || - MemMgr->getSymbolAddressInLogicalDylib(Name)) { + Resolver.findSymbolInLogicalDylib(Name)) { DEBUG(dbgs() << "\tSkipping already emitted common symbol '" << Name << "'\n"); continue; @@ -502,8 +504,8 @@ void RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj, // Allocate memory for the section unsigned SectionID = Sections.size(); - uint8_t *Addr = MemMgr->allocateDataSection(CommonSize, sizeof(void *), - SectionID, StringRef(), false); + uint8_t *Addr = MemMgr.allocateDataSection(CommonSize, sizeof(void *), + SectionID, StringRef(), false); if (!Addr) report_fatal_error("Unable to allocate memory for common symbols!"); uint64_t Offset = 0; @@ -577,10 +579,12 @@ unsigned RuntimeDyldImpl::emitSection(const ObjectFile &Obj, if (IsRequired) { Check(Section.getContents(data)); Allocate = DataSize + PaddingSize + StubBufSize; - Addr = IsCode ? MemMgr->allocateCodeSection(Allocate, Alignment, SectionID, - Name) - : MemMgr->allocateDataSection(Allocate, Alignment, SectionID, - Name, IsReadOnly); + if (!Allocate) + Allocate = 1; + Addr = IsCode ? MemMgr.allocateCodeSection(Allocate, Alignment, SectionID, + Name) + : MemMgr.allocateDataSection(Allocate, Alignment, SectionID, + Name, IsReadOnly); if (!Addr) report_fatal_error("Unable to allocate section memory!"); @@ -787,9 +791,9 @@ void RuntimeDyldImpl::resolveExternalSymbols() { uint64_t Addr = 0; RTDyldSymbolTable::const_iterator Loc = GlobalSymbolTable.find(Name); if (Loc == GlobalSymbolTable.end()) { - // This is an external symbol, try to get its address from - // MemoryManager. - Addr = MemMgr->getSymbolAddress(Name.data()); + // This is an external symbol, try to get its address from the symbol + // resolver. + Addr = Resolver.findSymbol(Name.data()).getAddress(); // The call to getSymbolAddress may have caused additional modules to // be loaded, which may have added new entries to the // ExternalSymbolRelocations map. Consquently, we need to update our @@ -810,7 +814,6 @@ void RuntimeDyldImpl::resolveExternalSymbols() { report_fatal_error("Program used external function '" + Name + "' which could not be resolved!"); - updateGOTEntries(Name, Addr); DEBUG(dbgs() << "Resolving relocations Name: " << Name << "\t" << format("0x%lx", Addr) << "\n"); // This list may have been updated when we called getSymbolAddress, so @@ -835,7 +838,12 @@ uint64_t RuntimeDyld::LoadedObjectInfo::getSectionLoadAddress( return 0; } -RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) { +void RuntimeDyld::MemoryManager::anchor() {} +void RuntimeDyld::SymbolResolver::anchor() {} + +RuntimeDyld::RuntimeDyld(RuntimeDyld::MemoryManager &MemMgr, + RuntimeDyld::SymbolResolver &Resolver) + : MemMgr(MemMgr), Resolver(Resolver) { // FIXME: There's a potential issue lurking here if a single instance of // RuntimeDyld is used to load multiple objects. The current implementation // associates a single memory manager with a RuntimeDyld instance. Even @@ -843,7 +851,6 @@ RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) { // they share a single memory manager. This can become a problem when page // permissions are applied. Dyld = nullptr; - MM = mm; ProcessAllSections = false; Checker = nullptr; } @@ -851,27 +858,33 @@ RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) { RuntimeDyld::~RuntimeDyld() {} static std::unique_ptr<RuntimeDyldCOFF> -createRuntimeDyldCOFF(Triple::ArchType Arch, RTDyldMemoryManager *MM, +createRuntimeDyldCOFF(Triple::ArchType Arch, RuntimeDyld::MemoryManager &MM, + RuntimeDyld::SymbolResolver &Resolver, bool ProcessAllSections, RuntimeDyldCheckerImpl *Checker) { - std::unique_ptr<RuntimeDyldCOFF> Dyld(RuntimeDyldCOFF::create(Arch, MM)); + std::unique_ptr<RuntimeDyldCOFF> Dyld = + RuntimeDyldCOFF::create(Arch, MM, Resolver); Dyld->setProcessAllSections(ProcessAllSections); Dyld->setRuntimeDyldChecker(Checker); return Dyld; } static std::unique_ptr<RuntimeDyldELF> -createRuntimeDyldELF(RTDyldMemoryManager *MM, bool ProcessAllSections, - RuntimeDyldCheckerImpl *Checker) { - std::unique_ptr<RuntimeDyldELF> Dyld(new RuntimeDyldELF(MM)); +createRuntimeDyldELF(RuntimeDyld::MemoryManager &MM, + RuntimeDyld::SymbolResolver &Resolver, + bool ProcessAllSections, RuntimeDyldCheckerImpl *Checker) { + std::unique_ptr<RuntimeDyldELF> Dyld(new RuntimeDyldELF(MM, Resolver)); Dyld->setProcessAllSections(ProcessAllSections); Dyld->setRuntimeDyldChecker(Checker); return Dyld; } static std::unique_ptr<RuntimeDyldMachO> -createRuntimeDyldMachO(Triple::ArchType Arch, RTDyldMemoryManager *MM, - bool ProcessAllSections, RuntimeDyldCheckerImpl *Checker) { - std::unique_ptr<RuntimeDyldMachO> Dyld(RuntimeDyldMachO::create(Arch, MM)); +createRuntimeDyldMachO(Triple::ArchType Arch, RuntimeDyld::MemoryManager &MM, + RuntimeDyld::SymbolResolver &Resolver, + bool ProcessAllSections, + RuntimeDyldCheckerImpl *Checker) { + std::unique_ptr<RuntimeDyldMachO> Dyld = + RuntimeDyldMachO::create(Arch, MM, Resolver); Dyld->setProcessAllSections(ProcessAllSections); Dyld->setRuntimeDyldChecker(Checker); return Dyld; @@ -881,14 +894,14 @@ std::unique_ptr<RuntimeDyld::LoadedObjectInfo> RuntimeDyld::loadObject(const ObjectFile &Obj) { if (!Dyld) { if (Obj.isELF()) - Dyld = createRuntimeDyldELF(MM, ProcessAllSections, Checker); + Dyld = createRuntimeDyldELF(MemMgr, Resolver, ProcessAllSections, Checker); else if (Obj.isMachO()) Dyld = createRuntimeDyldMachO( - static_cast<Triple::ArchType>(Obj.getArch()), MM, + static_cast<Triple::ArchType>(Obj.getArch()), MemMgr, Resolver, ProcessAllSections, Checker); else if (Obj.isCOFF()) Dyld = createRuntimeDyldCOFF( - static_cast<Triple::ArchType>(Obj.getArch()), MM, + static_cast<Triple::ArchType>(Obj.getArch()), MemMgr, Resolver, ProcessAllSections, Checker); else report_fatal_error("Incompatible object format!"); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp index 56bcb8e..8055d55 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp @@ -40,13 +40,15 @@ public: namespace llvm { std::unique_ptr<RuntimeDyldCOFF> -llvm::RuntimeDyldCOFF::create(Triple::ArchType Arch, RTDyldMemoryManager *MM) { +llvm::RuntimeDyldCOFF::create(Triple::ArchType Arch, + RuntimeDyld::MemoryManager &MemMgr, + RuntimeDyld::SymbolResolver &Resolver) { switch (Arch) { default: llvm_unreachable("Unsupported target for RuntimeDyldCOFF."); break; case Triple::x86_64: - return make_unique<RuntimeDyldCOFFX86_64>(MM); + return make_unique<RuntimeDyldCOFFX86_64>(MemMgr, Resolver); } } diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h index 681a3e5..32b8fa2 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h @@ -31,11 +31,15 @@ public: std::unique_ptr<RuntimeDyld::LoadedObjectInfo> loadObject(const object::ObjectFile &Obj) override; bool isCompatibleFile(const object::ObjectFile &Obj) const override; - static std::unique_ptr<RuntimeDyldCOFF> create(Triple::ArchType Arch, - RTDyldMemoryManager *MM); + + static std::unique_ptr<RuntimeDyldCOFF> + create(Triple::ArchType Arch, RuntimeDyld::MemoryManager &MemMgr, + RuntimeDyld::SymbolResolver &Resolver); protected: - RuntimeDyldCOFF(RTDyldMemoryManager *MM) : RuntimeDyldImpl(MM) {} + RuntimeDyldCOFF(RuntimeDyld::MemoryManager &MemMgr, + RuntimeDyld::SymbolResolver &Resolver) + : RuntimeDyldImpl(MemMgr, Resolver) {} uint64_t getSymbolOffset(const SymbolRef &Sym); }; diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp index c991408..957571b 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp @@ -738,7 +738,7 @@ uint64_t RuntimeDyldCheckerImpl::getSymbolLocalAddr(StringRef Symbol) const { uint64_t RuntimeDyldCheckerImpl::getSymbolRemoteAddr(StringRef Symbol) const { if (auto InternalSymbol = getRTDyld().getSymbol(Symbol)) return InternalSymbol.getAddress(); - return getRTDyld().MemMgr->getSymbolAddress(Symbol); + return getRTDyld().Resolver.findSymbol(Symbol).getAddress(); } uint64_t RuntimeDyldCheckerImpl::readMemoryAtAddr(uint64_t SrcAddr, diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h index e8d299a..69d2a7d 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h @@ -19,6 +19,7 @@ class RuntimeDyldCheckerImpl { friend class RuntimeDyldChecker; friend class RuntimeDyldImpl; friend class RuntimeDyldCheckerExprEval; + friend class RuntimeDyldELF; public: RuntimeDyldCheckerImpl(RuntimeDyld &RTDyld, MCDisassembler *Disassembler, diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 6278170..bbffdfb 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "RuntimeDyldELF.h" +#include "RuntimeDyldCheckerImpl.h" #include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" @@ -183,32 +184,30 @@ LoadedELFObjectInfo::getObjectForDebug(const ObjectFile &Obj) const { namespace llvm { -RuntimeDyldELF::RuntimeDyldELF(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {} +RuntimeDyldELF::RuntimeDyldELF(RuntimeDyld::MemoryManager &MemMgr, + RuntimeDyld::SymbolResolver &Resolver) + : RuntimeDyldImpl(MemMgr, Resolver), GOTSectionID(0), CurrentGOTIndex(0) {} RuntimeDyldELF::~RuntimeDyldELF() {} void RuntimeDyldELF::registerEHFrames() { - if (!MemMgr) - return; for (int i = 0, e = UnregisteredEHFrameSections.size(); i != e; ++i) { SID EHFrameSID = UnregisteredEHFrameSections[i]; uint8_t *EHFrameAddr = Sections[EHFrameSID].Address; uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress; size_t EHFrameSize = Sections[EHFrameSID].Size; - MemMgr->registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize); + MemMgr.registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize); RegisteredEHFrameSections.push_back(EHFrameSID); } UnregisteredEHFrameSections.clear(); } void RuntimeDyldELF::deregisterEHFrames() { - if (!MemMgr) - return; for (int i = 0, e = RegisteredEHFrameSections.size(); i != e; ++i) { SID EHFrameSID = RegisteredEHFrameSections[i]; uint8_t *EHFrameAddr = Sections[EHFrameSID].Address; uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress; size_t EHFrameSize = Sections[EHFrameSID].Size; - MemMgr->deregisterEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize); + MemMgr.deregisterEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize); } RegisteredEHFrameSections.clear(); } @@ -247,27 +246,16 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section, << format("%p\n", Section.Address + Offset)); break; } - case ELF::R_X86_64_GOTPCREL: { - // findGOTEntry returns the 'G + GOT' part of the relocation calculation - // based on the load/target address of the GOT (not the current/local addr). - uint64_t GOTAddr = findGOTEntry(Value, SymOffset); - uint64_t FinalAddress = Section.LoadAddress + Offset; - // The processRelocationRef method combines the symbol offset and the addend - // and in most cases that's what we want. For this relocation type, we need - // the raw addend, so we subtract the symbol offset to get it. - int64_t RealOffset = GOTAddr + Addend - SymOffset - FinalAddress; - assert(RealOffset <= INT32_MAX && RealOffset >= INT32_MIN); - int32_t TruncOffset = (RealOffset & 0xFFFFFFFF); - support::ulittle32_t::ref(Section.Address + Offset) = TruncOffset; - break; - } case ELF::R_X86_64_PC32: { // Get the placeholder value from the generated object since // a previous relocation attempt may have overwritten the loaded version support::ulittle32_t::ref Placeholder( (void *)(Section.ObjAddress + Offset)); uint64_t FinalAddress = Section.LoadAddress + Offset; - int64_t RealOffset = Placeholder + Value + Addend - FinalAddress; + int64_t RealOffset = Value + Addend - FinalAddress; + // Don't add the placeholder if this is a stub + if (Offset < Section.Size) + RealOffset += Placeholder; assert(RealOffset <= INT32_MAX && RealOffset >= INT32_MIN); int32_t TruncOffset = (RealOffset & 0xFFFFFFFF); support::ulittle32_t::ref(Section.Address + Offset) = TruncOffset; @@ -279,8 +267,10 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section, support::ulittle64_t::ref Placeholder( (void *)(Section.ObjAddress + Offset)); uint64_t FinalAddress = Section.LoadAddress + Offset; - support::ulittle64_t::ref(Section.Address + Offset) = - Placeholder + Value + Addend - FinalAddress; + int64_t RealOffset = Value + Addend - FinalAddress; + if (Offset < Section.Size) + RealOffset += Placeholder; + support::ulittle64_t::ref(Section.Address + Offset) = RealOffset; break; } } @@ -1325,16 +1315,18 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( Stubs[Value] = StubOffset; createStubFunction((uint8_t *)StubAddress); - // Create a GOT entry for the external function. - GOTEntries.push_back(Value); - - // Make our stub function a relative call to the GOT entry. - RelocationEntry RE(SectionID, StubOffset + 2, ELF::R_X86_64_GOTPCREL, - -4); - addRelocationForSymbol(RE, Value.SymbolName); - // Bump our stub offset counter Section.StubOffset = StubOffset + getMaxStubSize(); + + // Allocate a GOT Entry + uint64_t GOTOffset = allocateGOTEntries(SectionID, 1); + + // The load of the GOT address has an addend of -4 + resolveGOTOffsetRelocation(SectionID, StubOffset + 2, GOTOffset - 4); + + // Fill in the value of the symbol we're targeting into the GOT + addRelocationForSymbol(computeGOTOffsetRE(SectionID,GOTOffset,0,ELF::R_X86_64_64), + Value.SymbolName); } // Make the target call a call into the stub table. @@ -1345,10 +1337,17 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( Value.Offset); addRelocationForSection(RE, Value.SectionID); } + } else if (Arch == Triple::x86_64 && RelType == ELF::R_X86_64_GOTPCREL) { + uint64_t GOTOffset = allocateGOTEntries(SectionID, 1); + resolveGOTOffsetRelocation(SectionID, Offset, GOTOffset + Addend); + + // Fill in the value of the symbol we're targeting into the GOT + RelocationEntry RE = computeGOTOffsetRE(SectionID, GOTOffset, Value.Offset, ELF::R_X86_64_64); + if (Value.SymbolName) + addRelocationForSymbol(RE, Value.SymbolName); + else + addRelocationForSection(RE, Value.SectionID); } else { - if (Arch == Triple::x86_64 && RelType == ELF::R_X86_64_GOTPCREL) { - GOTEntries.push_back(Value); - } RelocationEntry RE(SectionID, Offset, RelType, Value.Addend, Value.Offset); if (Value.SymbolName) addRelocationForSymbol(RE, Value.SymbolName); @@ -1358,22 +1357,6 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( return ++RelI; } -void RuntimeDyldELF::updateGOTEntries(StringRef Name, uint64_t Addr) { - - SmallVectorImpl<std::pair<SID, GOTRelocations>>::iterator it; - SmallVectorImpl<std::pair<SID, GOTRelocations>>::iterator end = GOTs.end(); - - for (it = GOTs.begin(); it != end; ++it) { - GOTRelocations &GOTEntries = it->second; - for (int i = 0, e = GOTEntries.size(); i != e; ++i) { - if (GOTEntries[i].SymbolName != nullptr && - GOTEntries[i].SymbolName == Name) { - GOTEntries[i].Offset = Addr; - } - } - } -} - size_t RuntimeDyldELF::getGOTEntrySize() { // We don't use the GOT in all of these cases, but it's essentially free // to put them all here. @@ -1400,83 +1383,53 @@ size_t RuntimeDyldELF::getGOTEntrySize() { return Result; } -uint64_t RuntimeDyldELF::findGOTEntry(uint64_t LoadAddress, uint64_t Offset) { - - const size_t GOTEntrySize = getGOTEntrySize(); - - SmallVectorImpl<std::pair<SID, GOTRelocations>>::const_iterator it; - SmallVectorImpl<std::pair<SID, GOTRelocations>>::const_iterator end = - GOTs.end(); - - int GOTIndex = -1; - for (it = GOTs.begin(); it != end; ++it) { - SID GOTSectionID = it->first; - const GOTRelocations &GOTEntries = it->second; - - // Find the matching entry in our vector. - uint64_t SymbolOffset = 0; - for (int i = 0, e = GOTEntries.size(); i != e; ++i) { - if (!GOTEntries[i].SymbolName) { - if (getSectionLoadAddress(GOTEntries[i].SectionID) == LoadAddress && - GOTEntries[i].Offset == Offset) { - GOTIndex = i; - SymbolOffset = GOTEntries[i].Offset; - break; - } - } else { - // GOT entries for external symbols use the addend as the address when - // the external symbol has been resolved. - if (GOTEntries[i].Offset == LoadAddress) { - GOTIndex = i; - // Don't use the Addend here. The relocation handler will use it. - break; - } - } - } - - if (GOTIndex != -1) { - if (GOTEntrySize == sizeof(uint64_t)) { - uint64_t *LocalGOTAddr = (uint64_t *)getSectionAddress(GOTSectionID); - // Fill in this entry with the address of the symbol being referenced. - LocalGOTAddr[GOTIndex] = LoadAddress + SymbolOffset; - } else { - uint32_t *LocalGOTAddr = (uint32_t *)getSectionAddress(GOTSectionID); - // Fill in this entry with the address of the symbol being referenced. - LocalGOTAddr[GOTIndex] = (uint32_t)(LoadAddress + SymbolOffset); - } - - // Calculate the load address of this entry - return getSectionLoadAddress(GOTSectionID) + (GOTIndex * GOTEntrySize); - } +uint64_t RuntimeDyldELF::allocateGOTEntries(unsigned SectionID, unsigned no) +{ + (void)SectionID; // The GOT Section is the same for all section in the object file + if (GOTSectionID == 0) { + GOTSectionID = Sections.size(); + // Reserve a section id. We'll allocate the section later + // once we know the total size + Sections.push_back(SectionEntry(".got", 0, 0, 0)); } + uint64_t StartOffset = CurrentGOTIndex * getGOTEntrySize(); + CurrentGOTIndex += no; + return StartOffset; +} - assert(GOTIndex != -1 && "Unable to find requested GOT entry."); - return 0; +void RuntimeDyldELF::resolveGOTOffsetRelocation(unsigned SectionID, uint64_t Offset, uint64_t GOTOffset) +{ + // Fill in the relative address of the GOT Entry into the stub + RelocationEntry GOTRE(SectionID, Offset, ELF::R_X86_64_PC32, GOTOffset); + addRelocationForSection(GOTRE, GOTSectionID); +} + +RelocationEntry RuntimeDyldELF::computeGOTOffsetRE(unsigned SectionID, uint64_t GOTOffset, uint64_t SymbolOffset, + uint32_t Type) +{ + (void)SectionID; // The GOT Section is the same for all section in the object file + return RelocationEntry(GOTSectionID, GOTOffset, Type, SymbolOffset); } void RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj, ObjSectionToIDMap &SectionMap) { // If necessary, allocate the global offset table - if (MemMgr) { - // Allocate the GOT if necessary - size_t numGOTEntries = GOTEntries.size(); - if (numGOTEntries != 0) { - // Allocate memory for the section - unsigned SectionID = Sections.size(); - size_t TotalSize = numGOTEntries * getGOTEntrySize(); - uint8_t *Addr = MemMgr->allocateDataSection(TotalSize, getGOTEntrySize(), - SectionID, ".got", false); - if (!Addr) - report_fatal_error("Unable to allocate memory for GOT!"); - - GOTs.push_back(std::make_pair(SectionID, GOTEntries)); - Sections.push_back(SectionEntry(".got", Addr, TotalSize, 0)); - // For now, initialize all GOT entries to zero. We'll fill them in as - // needed when GOT-based relocations are applied. - memset(Addr, 0, TotalSize); - } - } else { - report_fatal_error("Unable to allocate memory for GOT!"); + if (GOTSectionID != 0) { + // Allocate memory for the section + size_t TotalSize = CurrentGOTIndex * getGOTEntrySize(); + uint8_t *Addr = MemMgr.allocateDataSection(TotalSize, getGOTEntrySize(), + GOTSectionID, ".got", false); + if (!Addr) + report_fatal_error("Unable to allocate memory for GOT!"); + + Sections[GOTSectionID] = SectionEntry(".got", Addr, TotalSize, 0); + + if (Checker) + Checker->registerSection(Obj.getFileName(), GOTSectionID); + + // For now, initialize all GOT entries to zero. We'll fill them in as + // needed when GOT-based relocations are applied. + memset(Addr, 0, TotalSize); } // Look for and record the EH frame section. @@ -1490,6 +1443,9 @@ void RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj, break; } } + + GOTSectionID = 0; + CurrentGOTIndex = 0; } bool RuntimeDyldELF::isCompatibleFile(const object::ObjectFile &Obj) const { diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h index 71260d0..590d26a 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h @@ -80,16 +80,32 @@ class RuntimeDyldELF : public RuntimeDyldImpl { ObjSectionToIDMap &LocalSections, RelocationValueRef &Rel); - uint64_t findGOTEntry(uint64_t LoadAddr, uint64_t Offset); size_t getGOTEntrySize(); - void updateGOTEntries(StringRef Name, uint64_t Addr) override; + SectionEntry &getSection(unsigned SectionID) { return Sections[SectionID]; } - // Relocation entries for symbols whose position-independent offset is - // updated in a global offset table. - typedef SmallVector<RelocationValueRef, 2> GOTRelocations; - GOTRelocations GOTEntries; // List of entries requiring finalization. - SmallVector<std::pair<SID, GOTRelocations>, 8> GOTs; // Allocated tables. + // Allocate no GOT entries for use in the given section. + uint64_t allocateGOTEntries(unsigned SectionID, unsigned no); + + // Resolve the relvative address of GOTOffset in Section ID and place + // it at the given Offset + void resolveGOTOffsetRelocation(unsigned SectionID, uint64_t Offset, + uint64_t GOTOffset); + + // For a GOT entry referenced from SectionID, compute a relocation entry + // that will place the final resolved value in the GOT slot + RelocationEntry computeGOTOffsetRE(unsigned SectionID, + uint64_t GOTOffset, + uint64_t SymbolOffset, + unsigned Type); + + // The tentative ID for the GOT section + unsigned GOTSectionID; + + // Records the current number of allocated slots in the GOT + // (This would be equivalent to GOTEntries.size() were it not for relocations + // that consume more than one slot) + unsigned CurrentGOTIndex; // When a module is loaded we save the SectionID of the EH frame section // in a table until we receive a request to register all unregistered @@ -98,8 +114,9 @@ class RuntimeDyldELF : public RuntimeDyldImpl { SmallVector<SID, 2> RegisteredEHFrameSections; public: - RuntimeDyldELF(RTDyldMemoryManager *mm); - virtual ~RuntimeDyldELF(); + RuntimeDyldELF(RuntimeDyld::MemoryManager &MemMgr, + RuntimeDyld::SymbolResolver &Resolver); + ~RuntimeDyldELF() override; std::unique_ptr<RuntimeDyld::LoadedObjectInfo> loadObject(const object::ObjectFile &O) override; diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index 05060dd..ee51a75 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Triple.h" +#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/ExecutionEngine/RuntimeDyldChecker.h" #include "llvm/Object/ObjectFile.h" @@ -51,7 +52,7 @@ class Twine; class SectionEntry { public: /// Name - section name. - StringRef Name; + std::string Name; /// Address - address in the linker's memory where the section resides. uint8_t *Address; @@ -188,7 +189,10 @@ class RuntimeDyldImpl { friend class RuntimeDyldCheckerImpl; protected: // The MemoryManager to load objects into. - RTDyldMemoryManager *MemMgr; + RuntimeDyld::MemoryManager &MemMgr; + + // The symbol resolver to use for external symbols. + RuntimeDyld::SymbolResolver &Resolver; // Attached RuntimeDyldChecker instance. Null if no instance attached. RuntimeDyldCheckerImpl *Checker; @@ -357,10 +361,6 @@ protected: /// \brief Resolve relocations to external symbols. void resolveExternalSymbols(); - /// \brief Update GOT entries for external symbols. - // The base class does nothing. ELF overrides this. - virtual void updateGOTEntries(StringRef Name, uint64_t Addr) {} - // \brief Compute an upper bound of the memory that is required to load all // sections void computeTotalAllocSize(const ObjectFile &Obj, uint64_t &CodeSize, @@ -374,8 +374,10 @@ protected: std::pair<unsigned, unsigned> loadObjectImpl(const object::ObjectFile &Obj); public: - RuntimeDyldImpl(RTDyldMemoryManager *mm) - : MemMgr(mm), Checker(nullptr), ProcessAllSections(false), HasError(false) { + RuntimeDyldImpl(RuntimeDyld::MemoryManager &MemMgr, + RuntimeDyld::SymbolResolver &Resolver) + : MemMgr(MemMgr), Resolver(Resolver), Checker(nullptr), + ProcessAllSections(false), HasError(false) { } virtual ~RuntimeDyldImpl(); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp index 2d39662..675063c 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp @@ -178,25 +178,30 @@ bool RuntimeDyldMachO::isCompatibleFile(const object::ObjectFile &Obj) const { } template <typename Impl> -void RuntimeDyldMachOCRTPBase<Impl>::finalizeLoad(const ObjectFile &ObjImg, +void RuntimeDyldMachOCRTPBase<Impl>::finalizeLoad(const ObjectFile &Obj, ObjSectionToIDMap &SectionMap) { unsigned EHFrameSID = RTDYLD_INVALID_SECTION_ID; unsigned TextSID = RTDYLD_INVALID_SECTION_ID; unsigned ExceptTabSID = RTDYLD_INVALID_SECTION_ID; - ObjSectionToIDMap::iterator i, e; - for (i = SectionMap.begin(), e = SectionMap.end(); i != e; ++i) { - const SectionRef &Section = i->first; + for (const auto &Section : Obj.sections()) { StringRef Name; Section.getName(Name); - if (Name == "__eh_frame") - EHFrameSID = i->second; - else if (Name == "__text") - TextSID = i->second; + + // Force emission of the __text, __eh_frame, and __gcc_except_tab sections + // if they're present. Otherwise call down to the impl to handle other + // sections that have already been emitted. + if (Name == "__text") + TextSID = findOrEmitSection(Obj, Section, true, SectionMap); + else if (Name == "__eh_frame") + EHFrameSID = findOrEmitSection(Obj, Section, false, SectionMap); else if (Name == "__gcc_except_tab") - ExceptTabSID = i->second; - else - impl().finalizeSection(ObjImg, i->second, Section); + ExceptTabSID = findOrEmitSection(Obj, Section, true, SectionMap); + else { + auto I = SectionMap.find(Section); + if (I != SectionMap.end()) + impl().finalizeSection(Obj, I->second, Section); + } } UnregisteredEHFrameSections.push_back( EHFrameRelatedSections(EHFrameSID, TextSID, ExceptTabSID)); @@ -239,7 +244,8 @@ unsigned char *RuntimeDyldMachOCRTPBase<Impl>::processFDE(unsigned char *P, } static int64_t computeDelta(SectionEntry *A, SectionEntry *B) { - int64_t ObjDistance = A->ObjAddress - B->ObjAddress; + int64_t ObjDistance = + static_cast<int64_t>(A->ObjAddress) - static_cast<int64_t>(B->ObjAddress); int64_t MemDistance = A->LoadAddress - B->LoadAddress; return ObjDistance - MemDistance; } @@ -247,8 +253,6 @@ static int64_t computeDelta(SectionEntry *A, SectionEntry *B) { template <typename Impl> void RuntimeDyldMachOCRTPBase<Impl>::registerEHFrames() { - if (!MemMgr) - return; for (int i = 0, e = UnregisteredEHFrameSections.size(); i != e; ++i) { EHFrameRelatedSections &SectionInfo = UnregisteredEHFrameSections[i]; if (SectionInfo.EHFrameSID == RTDYLD_INVALID_SECTION_ID || @@ -271,22 +275,28 @@ void RuntimeDyldMachOCRTPBase<Impl>::registerEHFrames() { P = processFDE(P, DeltaForText, DeltaForEH); } while (P != End); - MemMgr->registerEHFrames(EHFrame->Address, EHFrame->LoadAddress, - EHFrame->Size); + MemMgr.registerEHFrames(EHFrame->Address, EHFrame->LoadAddress, + EHFrame->Size); } UnregisteredEHFrameSections.clear(); } std::unique_ptr<RuntimeDyldMachO> -RuntimeDyldMachO::create(Triple::ArchType Arch, RTDyldMemoryManager *MM) { +RuntimeDyldMachO::create(Triple::ArchType Arch, + RuntimeDyld::MemoryManager &MemMgr, + RuntimeDyld::SymbolResolver &Resolver) { switch (Arch) { default: llvm_unreachable("Unsupported target for RuntimeDyldMachO."); break; - case Triple::arm: return make_unique<RuntimeDyldMachOARM>(MM); - case Triple::aarch64: return make_unique<RuntimeDyldMachOAArch64>(MM); - case Triple::x86: return make_unique<RuntimeDyldMachOI386>(MM); - case Triple::x86_64: return make_unique<RuntimeDyldMachOX86_64>(MM); + case Triple::arm: + return make_unique<RuntimeDyldMachOARM>(MemMgr, Resolver); + case Triple::aarch64: + return make_unique<RuntimeDyldMachOAArch64>(MemMgr, Resolver); + case Triple::x86: + return make_unique<RuntimeDyldMachOI386>(MemMgr, Resolver); + case Triple::x86_64: + return make_unique<RuntimeDyldMachOX86_64>(MemMgr, Resolver); } } diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h index f8bfc03..45a94ba 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h @@ -49,7 +49,9 @@ protected: // EH frame sections with the memory manager. SmallVector<EHFrameRelatedSections, 2> UnregisteredEHFrameSections; - RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {} + RuntimeDyldMachO(RuntimeDyld::MemoryManager &MemMgr, + RuntimeDyld::SymbolResolver &Resolver) + : RuntimeDyldImpl(MemMgr, Resolver) {} /// This convenience method uses memcpy to extract a contiguous addend (the /// addend size and offset are taken from the corresponding fields of the RE). @@ -114,8 +116,10 @@ protected: public: /// Create a RuntimeDyldMachO instance for the given target architecture. - static std::unique_ptr<RuntimeDyldMachO> create(Triple::ArchType Arch, - RTDyldMemoryManager *mm); + static std::unique_ptr<RuntimeDyldMachO> + create(Triple::ArchType Arch, + RuntimeDyld::MemoryManager &MemMgr, + RuntimeDyld::SymbolResolver &Resolver); std::unique_ptr<RuntimeDyld::LoadedObjectInfo> loadObject(const object::ObjectFile &O) override; @@ -142,7 +146,9 @@ private: int64_t DeltaForEH); public: - RuntimeDyldMachOCRTPBase(RTDyldMemoryManager *mm) : RuntimeDyldMachO(mm) {} + RuntimeDyldMachOCRTPBase(RuntimeDyld::MemoryManager &MemMgr, + RuntimeDyld::SymbolResolver &Resolver) + : RuntimeDyldMachO(MemMgr, Resolver) {} void finalizeLoad(const ObjectFile &Obj, ObjSectionToIDMap &SectionMap) override; diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h index ce2f4a2..cd534a1 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h @@ -32,7 +32,9 @@ private: SmallVector<SID, 2> RegisteredEHFrameSections; public: - RuntimeDyldCOFFX86_64(RTDyldMemoryManager *MM) : RuntimeDyldCOFF(MM) {} + RuntimeDyldCOFFX86_64(RuntimeDyld::MemoryManager &MM, + RuntimeDyld::SymbolResolver &Resolver) + : RuntimeDyldCOFF(MM, Resolver) {} unsigned getMaxStubSize() override { return 6; // 2-byte jmp instruction + 32-bit relative address @@ -177,13 +179,11 @@ public: unsigned getStubAlignment() override { return 1; } void registerEHFrames() override { - if (!MemMgr) - return; for (auto const &EHFrameSID : UnregisteredEHFrameSections) { uint8_t *EHFrameAddr = Sections[EHFrameSID].Address; uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress; size_t EHFrameSize = Sections[EHFrameSID].Size; - MemMgr->registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize); + MemMgr.registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize); RegisteredEHFrameSections.push_back(EHFrameSID); } UnregisteredEHFrameSections.clear(); diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h index 196fa62..99fd6e3 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h @@ -23,8 +23,9 @@ public: typedef uint64_t TargetPtrT; - RuntimeDyldMachOAArch64(RTDyldMemoryManager *MM) - : RuntimeDyldMachOCRTPBase(MM) {} + RuntimeDyldMachOAArch64(RuntimeDyld::MemoryManager &MM, + RuntimeDyld::SymbolResolver &Resolver) + : RuntimeDyldMachOCRTPBase(MM, Resolver) {} unsigned getMaxStubSize() override { return 8; } diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h index 09e430e..09e51f2 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h @@ -25,7 +25,9 @@ public: typedef uint32_t TargetPtrT; - RuntimeDyldMachOARM(RTDyldMemoryManager *MM) : RuntimeDyldMachOCRTPBase(MM) {} + RuntimeDyldMachOARM(RuntimeDyld::MemoryManager &MM, + RuntimeDyld::SymbolResolver &Resolver) + : RuntimeDyldMachOCRTPBase(MM, Resolver) {} unsigned getMaxStubSize() override { return 8; } diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h index 67d7027..053f90c 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h @@ -22,8 +22,9 @@ public: typedef uint32_t TargetPtrT; - RuntimeDyldMachOI386(RTDyldMemoryManager *MM) - : RuntimeDyldMachOCRTPBase(MM) {} + RuntimeDyldMachOI386(RuntimeDyld::MemoryManager &MM, + RuntimeDyld::SymbolResolver &Resolver) + : RuntimeDyldMachOCRTPBase(MM, Resolver) {} unsigned getMaxStubSize() override { return 0; } diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h index 0734017..4b3b01b 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h @@ -22,8 +22,9 @@ public: typedef uint64_t TargetPtrT; - RuntimeDyldMachOX86_64(RTDyldMemoryManager *MM) - : RuntimeDyldMachOCRTPBase(MM) {} + RuntimeDyldMachOX86_64(RuntimeDyld::MemoryManager &MM, + RuntimeDyld::SymbolResolver &Resolver) + : RuntimeDyldMachOCRTPBase(MM, Resolver) {} unsigned getMaxStubSize() override { return 8; } diff --git a/lib/Fuzzer/CMakeLists.txt b/lib/Fuzzer/CMakeLists.txt index 81e51d1..bfd87ec 100644 --- a/lib/Fuzzer/CMakeLists.txt +++ b/lib/Fuzzer/CMakeLists.txt @@ -1,8 +1,10 @@ -# Disable the coverage instrumentation for the fuzzer itself. -set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2 -fsanitize-coverage=0") -if( LLVM_USE_SANITIZE_COVERAGE ) +set(LIBFUZZER_FLAGS_BASE "${CMAKE_CXX_FLAGS_RELEASE}") +# Disable the coverage and sanitizer instrumentation for the fuzzer itself. +set(CMAKE_CXX_FLAGS_RELEASE "${LIBFUZZER_FLAGS_BASE} -O2 -fno-sanitize=all") +if( LLVM_USE_SANITIZE_COVERAGE ) add_library(LLVMFuzzerNoMain OBJECT FuzzerCrossOver.cpp + FuzzerDFSan.cpp FuzzerDriver.cpp FuzzerIO.cpp FuzzerLoop.cpp diff --git a/lib/Fuzzer/FuzzerDFSan.cpp b/lib/Fuzzer/FuzzerDFSan.cpp new file mode 100644 index 0000000..16f8c0f --- /dev/null +++ b/lib/Fuzzer/FuzzerDFSan.cpp @@ -0,0 +1,275 @@ +//===- FuzzerDFSan.cpp - DFSan-based fuzzer mutator -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// DataFlowSanitizer (DFSan) is a tool for +// generalised dynamic data flow (taint) analysis: +// http://clang.llvm.org/docs/DataFlowSanitizer.html . +// +// This file implements a mutation algorithm based on taint +// analysis feedback from DFSan. +// +// The approach has some similarity to "Taint-based Directed Whitebox Fuzzing" +// by Vijay Ganesh & Tim Leek & Martin Rinard: +// http://dspace.mit.edu/openaccess-disseminate/1721.1/59320, +// but it uses a full blown LLVM IR taint analysis and separate instrumentation +// to analyze all of the "attack points" at once. +// +// Workflow: +// * lib/Fuzzer/Fuzzer*.cpp is compiled w/o any instrumentation. +// * The code under test is compiled with DFSan *and* with special extra hooks +// that are inserted before dfsan. Currently supported hooks: +// - __sanitizer_cov_trace_cmp: inserted before every ICMP instruction, +// receives the type, size and arguments of ICMP. +// * Every call to HOOK(a,b) is replaced by DFSan with +// __dfsw_HOOK(a, b, label(a), label(b)) so that __dfsw_HOOK +// gets all the taint labels for the arguments. +// * At the Fuzzer startup we assign a unique DFSan label +// to every byte of the input string (Fuzzer::CurrentUnit) so that for any +// chunk of data we know which input bytes it has derived from. +// * The __dfsw_* functions (implemented in this file) record the +// parameters (i.e. the application data and the corresponding taint labels) +// in a global state. +// * Fuzzer::MutateWithDFSan() tries to use the data recorded by __dfsw_* +// hooks to guide the fuzzing towards new application states. +// For example if 4 bytes of data that derive from input bytes {4,5,6,7} +// are compared with a constant 12345 and the comparison always yields +// the same result, we try to insert 12345, 12344, 12346 into bytes +// {4,5,6,7} of the next fuzzed inputs. +// +// This code does not function when DFSan is not linked in. +// Instead of using ifdefs and thus requiring a separate build of lib/Fuzzer +// we redeclare the dfsan_* interface functions as weak and check if they +// are nullptr before calling. +// If this approach proves to be useful we may add attribute(weak) to the +// dfsan declarations in dfsan_interface.h +// +// This module is in the "proof of concept" stage. +// It is capable of solving only the simplest puzzles +// like test/dfsan/DFSanSimpleCmpTest.cpp. +//===----------------------------------------------------------------------===// + +/* Example of manual usage: +( + cd $LLVM/lib/Fuzzer/ + clang -fPIC -c -g -O2 -std=c++11 Fuzzer*.cpp + clang++ -O0 -std=c++11 -fsanitize-coverage=3 \ + -mllvm -sanitizer-coverage-experimental-trace-compares=1 \ + -fsanitize=dataflow -fsanitize-blacklist=./dfsan_fuzzer_abi.list \ + test/dfsan/DFSanSimpleCmpTest.cpp Fuzzer*.o + ./a.out +) +*/ + +#include "FuzzerInternal.h" +#include <sanitizer/dfsan_interface.h> + +#include <cstring> +#include <iostream> +#include <unordered_map> + +extern "C" { +__attribute__((weak)) +dfsan_label dfsan_create_label(const char *desc, void *userdata); +__attribute__((weak)) +void dfsan_set_label(dfsan_label label, void *addr, size_t size); +__attribute__((weak)) +void dfsan_add_label(dfsan_label label, void *addr, size_t size); +__attribute__((weak)) +const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label); +} // extern "C" + +namespace { + +// These values are copied from include/llvm/IR/InstrTypes.h. +// We do not include the LLVM headers here to remain independent. +// If these values ever change, an assertion in ComputeCmp will fail. +enum Predicate { + ICMP_EQ = 32, ///< equal + ICMP_NE = 33, ///< not equal + ICMP_UGT = 34, ///< unsigned greater than + ICMP_UGE = 35, ///< unsigned greater or equal + ICMP_ULT = 36, ///< unsigned less than + ICMP_ULE = 37, ///< unsigned less or equal + ICMP_SGT = 38, ///< signed greater than + ICMP_SGE = 39, ///< signed greater or equal + ICMP_SLT = 40, ///< signed less than + ICMP_SLE = 41, ///< signed less or equal +}; + +template <class U, class S> +bool ComputeCmp(size_t CmpType, U Arg1, U Arg2) { + switch(CmpType) { + case ICMP_EQ : return Arg1 == Arg2; + case ICMP_NE : return Arg1 != Arg2; + case ICMP_UGT: return Arg1 > Arg2; + case ICMP_UGE: return Arg1 >= Arg2; + case ICMP_ULT: return Arg1 < Arg2; + case ICMP_ULE: return Arg1 <= Arg2; + case ICMP_SGT: return (S)Arg1 > (S)Arg2; + case ICMP_SGE: return (S)Arg1 >= (S)Arg2; + case ICMP_SLT: return (S)Arg1 < (S)Arg2; + case ICMP_SLE: return (S)Arg1 <= (S)Arg2; + default: assert(0 && "unsupported CmpType"); + } + return false; +} + +static bool ComputeCmp(size_t CmpSize, size_t CmpType, uint64_t Arg1, + uint64_t Arg2) { + if (CmpSize == 8) return ComputeCmp<uint64_t, int64_t>(CmpType, Arg1, Arg2); + if (CmpSize == 4) return ComputeCmp<uint32_t, int32_t>(CmpType, Arg1, Arg2); + if (CmpSize == 2) return ComputeCmp<uint16_t, int16_t>(CmpType, Arg1, Arg2); + if (CmpSize == 1) return ComputeCmp<uint8_t, int8_t>(CmpType, Arg1, Arg2); + assert(0 && "unsupported type size"); + return true; +} + +// As a simplification we use the range of input bytes instead of a set of input +// bytes. +struct LabelRange { + uint16_t Beg, End; // Range is [Beg, End), thus Beg==End is an empty range. + + LabelRange(uint16_t Beg = 0, uint16_t End = 0) : Beg(Beg), End(End) {} + + static LabelRange Join(LabelRange LR1, LabelRange LR2) { + if (LR1.Beg == LR1.End) return LR2; + if (LR2.Beg == LR2.End) return LR1; + return {std::min(LR1.Beg, LR2.Beg), std::max(LR1.End, LR2.End)}; + } + LabelRange &Join(LabelRange LR) { + return *this = Join(*this, LR); + } + static LabelRange Singleton(const dfsan_label_info *LI) { + uint16_t Idx = (uint16_t)(uintptr_t)LI->userdata; + assert(Idx > 0); + return {(uint16_t)(Idx - 1), Idx}; + } +}; + +std::ostream &operator<<(std::ostream &os, const LabelRange &LR) { + return os << "[" << LR.Beg << "," << LR.End << ")"; +} + +class DFSanState { + public: + DFSanState(const fuzzer::Fuzzer::FuzzingOptions &Options) + : Options(Options) {} + + struct CmpSiteInfo { + size_t ResCounters[2] = {0, 0}; + size_t CmpSize = 0; + LabelRange LR; + std::unordered_map<uint64_t, size_t> CountedConstants; + }; + + LabelRange GetLabelRange(dfsan_label L); + void DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType, + uint64_t Arg1, uint64_t Arg2, dfsan_label L1, + dfsan_label L2); + bool Mutate(fuzzer::Unit *U); + + private: + std::unordered_map<uintptr_t, CmpSiteInfo> PcToCmpSiteInfoMap; + LabelRange LabelRanges[1 << (sizeof(dfsan_label) * 8)] = {}; + const fuzzer::Fuzzer::FuzzingOptions &Options; +}; + +LabelRange DFSanState::GetLabelRange(dfsan_label L) { + LabelRange &LR = LabelRanges[L]; + if (LR.Beg < LR.End || L == 0) + return LR; + const dfsan_label_info *LI = dfsan_get_label_info(L); + if (LI->l1 || LI->l2) + return LR = LabelRange::Join(GetLabelRange(LI->l1), GetLabelRange(LI->l2)); + return LR = LabelRange::Singleton(LI); +} + +void DFSanState::DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType, + uint64_t Arg1, uint64_t Arg2, dfsan_label L1, + dfsan_label L2) { + if (L1 == 0 && L2 == 0) + return; // Not actionable. + if (L1 != 0 && L2 != 0) + return; // Probably still actionable. + bool Res = ComputeCmp(CmpSize, CmpType, Arg1, Arg2); + CmpSiteInfo &CSI = PcToCmpSiteInfoMap[PC]; + CSI.CmpSize = CmpSize; + CSI.LR.Join(GetLabelRange(L1)).Join(GetLabelRange(L2)); + if (!L1) CSI.CountedConstants[Arg1]++; + if (!L2) CSI.CountedConstants[Arg2]++; + size_t Counter = CSI.ResCounters[Res]++; + + if (Options.Verbosity >= 2 && + (Counter & (Counter - 1)) == 0 && + CSI.ResCounters[!Res] == 0) + std::cerr << "DFSAN:" + << " PC " << std::hex << PC << std::dec + << " S " << CmpSize + << " T " << CmpType + << " A1 " << Arg1 << " A2 " << Arg2 << " R " << Res + << " L" << L1 << GetLabelRange(L1) + << " L" << L2 << GetLabelRange(L2) + << " LR " << CSI.LR + << "\n"; +} + +bool DFSanState::Mutate(fuzzer::Unit *U) { + for (auto &PCToCmp : PcToCmpSiteInfoMap) { + auto &CSI = PCToCmp.second; + if (CSI.ResCounters[0] * CSI.ResCounters[1] != 0) continue; + if (CSI.ResCounters[0] + CSI.ResCounters[1] < 1000) continue; + if (CSI.CountedConstants.size() != 1) continue; + uintptr_t C = CSI.CountedConstants.begin()->first; + if (U->size() >= CSI.CmpSize) { + size_t RangeSize = CSI.LR.End - CSI.LR.Beg; + size_t Idx = CSI.LR.Beg + rand() % RangeSize; + if (Idx + CSI.CmpSize > U->size()) continue; + C += rand() % 5 - 2; + memcpy(U->data() + Idx, &C, CSI.CmpSize); + return true; + } + } + return false; +} + +static DFSanState *DFSan; + +} // namespace + +namespace fuzzer { + +bool Fuzzer::MutateWithDFSan(Unit *U) { + if (!&dfsan_create_label || !DFSan) return false; + return DFSan->Mutate(U); +} + +void Fuzzer::InitializeDFSan() { + if (!&dfsan_create_label || !Options.UseDFSan) return; + DFSan = new DFSanState(Options); + CurrentUnit.resize(Options.MaxLen); + for (size_t i = 0; i < static_cast<size_t>(Options.MaxLen); i++) { + dfsan_label L = dfsan_create_label("input", (void*)(i + 1)); + // We assume that no one else has called dfsan_create_label before. + assert(L == i + 1); + dfsan_set_label(L, &CurrentUnit[i], 1); + } +} + +} // namespace fuzzer + +extern "C" { +void __dfsw___sanitizer_cov_trace_cmp(uint64_t SizeAndType, uint64_t Arg1, + uint64_t Arg2, dfsan_label L0, + dfsan_label L1, dfsan_label L2) { + assert(L0 == 0); + uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0)); + uint64_t CmpSize = (SizeAndType >> 32) / 8; + uint64_t Type = (SizeAndType << 32) >> 32; + DFSan->DFSanCmpCallback(PC, CmpSize, Type, Arg1, Arg2, L1, L2); +} +} // extern "C" diff --git a/lib/Fuzzer/FuzzerDriver.cpp b/lib/Fuzzer/FuzzerDriver.cpp index 9ccd744..05a699e 100644 --- a/lib/Fuzzer/FuzzerDriver.cpp +++ b/lib/Fuzzer/FuzzerDriver.cpp @@ -18,6 +18,10 @@ #include <thread> #include <atomic> #include <mutex> +#include <string> +#include <sstream> +#include <algorithm> +#include <iterator> namespace fuzzer { @@ -26,19 +30,26 @@ struct FlagDescription { const char *Name; const char *Description; int Default; - int *Flag; + int *IntFlag; + const char **StrFlag; }; struct { -#define FUZZER_FLAG(Type, Name, Default, Description) Type Name; +#define FUZZER_FLAG_INT(Name, Default, Description) int Name; +#define FUZZER_FLAG_STRING(Name, Description) const char *Name; #include "FuzzerFlags.def" -#undef FUZZER_FLAG +#undef FUZZER_FLAG_INT +#undef FUZZER_FLAG_STRING } Flags; static FlagDescription FlagDescriptions [] { -#define FUZZER_FLAG(Type, Name, Default, Description) {#Name, Description, Default, &Flags.Name}, +#define FUZZER_FLAG_INT(Name, Default, Description) \ + { #Name, Description, Default, &Flags.Name, nullptr}, +#define FUZZER_FLAG_STRING(Name, Description) \ + { #Name, Description, 0, nullptr, &Flags.Name }, #include "FuzzerFlags.def" -#undef FUZZER_FLAG +#undef FUZZER_FLAG_INT +#undef FUZZER_FLAG_STRING }; static const size_t kNumFlags = @@ -79,11 +90,18 @@ static bool ParseOneFlag(const char *Param) { const char *Name = FlagDescriptions[F].Name; const char *Str = FlagValue(Param, Name); if (Str) { - int Val = std::stol(Str); - *FlagDescriptions[F].Flag = Val; - if (Flags.verbosity >= 2) - std::cerr << "Flag: " << Name << " " << Val << "\n"; - return true; + if (FlagDescriptions[F].IntFlag) { + int Val = std::stol(Str); + *FlagDescriptions[F].IntFlag = Val; + if (Flags.verbosity >= 2) + std::cerr << "Flag: " << Name << " " << Val << "\n"; + return true; + } else if (FlagDescriptions[F].StrFlag) { + *FlagDescriptions[F].StrFlag = Str; + if (Flags.verbosity >= 2) + std::cerr << "Flag: " << Name << " " << Str << "\n"; + return true; + } } } PrintHelp(); @@ -92,8 +110,12 @@ static bool ParseOneFlag(const char *Param) { // We don't use any library to minimize dependencies. static void ParseFlags(int argc, char **argv) { - for (size_t F = 0; F < kNumFlags; F++) - *FlagDescriptions[F].Flag = FlagDescriptions[F].Default; + for (size_t F = 0; F < kNumFlags; F++) { + if (FlagDescriptions[F].IntFlag) + *FlagDescriptions[F].IntFlag = FlagDescriptions[F].Default; + if (FlagDescriptions[F].StrFlag) + *FlagDescriptions[F].StrFlag = nullptr; + } for (int A = 1; A < argc; A++) { if (ParseOneFlag(argv[A])) continue; inputs.push_back(argv[A]); @@ -139,6 +161,26 @@ static int RunInMultipleProcesses(int argc, char **argv, int NumWorkers, return HasErrors ? 1 : 0; } +std::vector<std::string> ReadTokensFile(const char *TokensFilePath) { + if (!TokensFilePath) return {}; + std::string TokensFileContents = FileToString(TokensFilePath); + std::istringstream ISS(TokensFileContents); + std::vector<std::string> Res = {std::istream_iterator<std::string>{ISS}, + std::istream_iterator<std::string>{}}; + Res.push_back(" "); + Res.push_back("\t"); + Res.push_back("\n"); + return Res; +} + +int ApplyTokens(const Fuzzer &F, const char *InputFilePath) { + Unit U = FileToVector(InputFilePath); + auto T = F.SubstituteTokens(U); + T.push_back(0); + std::cout << T.data(); + return 0; +} + int FuzzerDriver(int argc, char **argv, UserCallback Callback) { using namespace fuzzer; @@ -161,8 +203,10 @@ int FuzzerDriver(int argc, char **argv, UserCallback Callback) { Options.UseCounters = Flags.use_counters; Options.UseFullCoverageSet = Flags.use_full_coverage_set; Options.UseCoveragePairs = Flags.use_coverage_pairs; + Options.UseDFSan = Flags.dfsan; Options.PreferSmallDuringInitialShuffle = Flags.prefer_small_during_initial_shuffle; + Options.Tokens = ReadTokensFile(Flags.tokens); if (Flags.runs >= 0) Options.MaxNumberOfRuns = Flags.runs; if (!inputs.empty()) @@ -181,6 +225,16 @@ int FuzzerDriver(int argc, char **argv, UserCallback Callback) { if (Flags.timeout > 0) SetTimer(Flags.timeout); + if (Flags.verbosity >= 2) { + std::cerr << "Tokens: {"; + for (auto &T : Options.Tokens) + std::cerr << T << ","; + std::cerr << "}\n"; + } + + if (Flags.apply_tokens) + return ApplyTokens(F, Flags.apply_tokens); + for (auto &inp : inputs) F.ReadDir(inp); diff --git a/lib/Fuzzer/FuzzerFlags.def b/lib/Fuzzer/FuzzerFlags.def index 08176af..dbaf75d 100644 --- a/lib/Fuzzer/FuzzerFlags.def +++ b/lib/Fuzzer/FuzzerFlags.def @@ -6,41 +6,48 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// Flags. FUZZER_FLAG macro should be defined at the point of inclusion. -// We are not using any flag parsing library for better portability and -// independence. +// Flags. FUZZER_FLAG_INT/FUZZER_FLAG_STRING macros should be defined at the +// point of inclusion. We are not using any flag parsing library for better +// portability and independence. //===----------------------------------------------------------------------===// -FUZZER_FLAG(int, verbosity, 1, "Verbosity level.") -FUZZER_FLAG(int, seed, 0, "Random seed. If 0, seed is generated.") -FUZZER_FLAG(int, iterations, -1, +FUZZER_FLAG_INT(verbosity, 1, "Verbosity level.") +FUZZER_FLAG_INT(seed, 0, "Random seed. If 0, seed is generated.") +FUZZER_FLAG_INT(iterations, -1, "Number of iterations of the fuzzer internal loop" " (-1 for infinite iterations).") -FUZZER_FLAG(int, runs, -1, +FUZZER_FLAG_INT(runs, -1, "Number of individual test runs (-1 for infinite runs).") -FUZZER_FLAG(int, max_len, 64, "Maximal length of the test input.") -FUZZER_FLAG(int, cross_over, 1, "If 1, cross over inputs.") -FUZZER_FLAG(int, mutate_depth, 5, +FUZZER_FLAG_INT(max_len, 64, "Maximal length of the test input.") +FUZZER_FLAG_INT(cross_over, 1, "If 1, cross over inputs.") +FUZZER_FLAG_INT(mutate_depth, 5, "Apply this number of consecutive mutations to each input.") -FUZZER_FLAG( - int, prefer_small_during_initial_shuffle, -1, +FUZZER_FLAG_INT( + prefer_small_during_initial_shuffle, -1, "If 1, always prefer smaller inputs during the initial corpus shuffle." " If 0, never do that. If -1, do it sometimes.") -FUZZER_FLAG(int, exit_on_first, 0, +FUZZER_FLAG_INT(exit_on_first, 0, "If 1, exit after the first new interesting input is found.") -FUZZER_FLAG(int, timeout, -1, "Timeout in seconds (if positive).") -FUZZER_FLAG(int, help, 0, "Print help.") -FUZZER_FLAG( - int, save_minimized_corpus, 0, +FUZZER_FLAG_INT(timeout, -1, "Timeout in seconds (if positive).") +FUZZER_FLAG_INT(help, 0, "Print help.") +FUZZER_FLAG_INT( + save_minimized_corpus, 0, "If 1, the minimized corpus is saved into the first input directory") -FUZZER_FLAG(int, use_counters, 0, "Use coverage counters") -FUZZER_FLAG(int, use_full_coverage_set, 0, +FUZZER_FLAG_INT(use_counters, 0, "Use coverage counters") +FUZZER_FLAG_INT(use_full_coverage_set, 0, "Experimental: Maximize the number of different full" " coverage sets as opposed to maximizing the total coverage." " This is potentially MUCH slower, but may discover more paths.") -FUZZER_FLAG(int, use_coverage_pairs, 0, +FUZZER_FLAG_INT(use_coverage_pairs, 0, "Experimental: Maximize the number of different coverage pairs.") -FUZZER_FLAG(int, jobs, 0, "Number of jobs to run. If jobs >= 1 we spawn" +FUZZER_FLAG_INT(jobs, 0, "Number of jobs to run. If jobs >= 1 we spawn" " this number of jobs in separate worker processes" " with stdout/stderr redirected to fuzz-JOB.log.") -FUZZER_FLAG(int, workers, 0, +FUZZER_FLAG_INT(workers, 0, "Number of simultaneous worker processes to run the jobs.") +FUZZER_FLAG_INT(dfsan, 1, "Use DFSan for taint-guided mutations. No-op unless " + "the DFSan instrumentation was compiled in.") + +FUZZER_FLAG_STRING(tokens, "Use the file with tokens (one token per line) to" + " fuzz a token based input language.") +FUZZER_FLAG_STRING(apply_tokens, "Read the given input file, substitute bytes " + " with tokens and write the result to stdout.") diff --git a/lib/Fuzzer/FuzzerIO.cpp b/lib/Fuzzer/FuzzerIO.cpp index 224808c..ef23d42 100644 --- a/lib/Fuzzer/FuzzerIO.cpp +++ b/lib/Fuzzer/FuzzerIO.cpp @@ -33,6 +33,12 @@ Unit FileToVector(const std::string &Path) { std::istreambuf_iterator<char>()); } +std::string FileToString(const std::string &Path) { + std::ifstream T(Path); + return std::string((std::istreambuf_iterator<char>(T)), + std::istreambuf_iterator<char>()); +} + void CopyFileToErr(const std::string &Path) { std::ifstream T(Path); std::copy(std::istreambuf_iterator<char>(T), std::istreambuf_iterator<char>(), diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h index e4e5eb7..7787109 100644 --- a/lib/Fuzzer/FuzzerInternal.h +++ b/lib/Fuzzer/FuzzerInternal.h @@ -23,7 +23,8 @@ namespace fuzzer { typedef std::vector<uint8_t> Unit; using namespace std::chrono; -Unit ReadFile(const char *Path); +std::string FileToString(const std::string &Path); +Unit FileToVector(const std::string &Path); void ReadDirToVectorOfUnits(const char *Path, std::vector<Unit> *V); void WriteToFile(const Unit &U, const std::string &Path); void CopyFileToErr(const std::string &Path); @@ -51,17 +52,17 @@ class Fuzzer { bool UseCounters = false; bool UseFullCoverageSet = false; bool UseCoveragePairs = false; + bool UseDFSan = false; int PreferSmallDuringInitialShuffle = -1; size_t MaxNumberOfRuns = ULONG_MAX; std::string OutputCorpus; + std::vector<std::string> Tokens; }; - Fuzzer(UserCallback Callback, FuzzingOptions Options) - : Callback(Callback), Options(Options) { - SetDeathCallback(); - } + Fuzzer(UserCallback Callback, FuzzingOptions Options); void AddToCorpus(const Unit &U) { Corpus.push_back(U); } size_t Loop(size_t NumIterations); void ShuffleAndMinimize(); + void InitializeDFSan(); size_t CorpusSize() const { return Corpus.size(); } void ReadDir(const std::string &Path) { ReadDirToVectorOfUnits(Path.c_str(), &Corpus); @@ -76,20 +77,28 @@ class Fuzzer { size_t getTotalNumberOfRuns() { return TotalNumberOfRuns; } - static void AlarmCallback(); + static void StaticAlarmCallback(); + + Unit SubstituteTokens(const Unit &U) const; private: + void AlarmCallback(); + void ExecuteCallback(const Unit &U); size_t MutateAndTestOne(Unit *U); size_t RunOne(const Unit &U); size_t RunOneMaximizeTotalCoverage(const Unit &U); size_t RunOneMaximizeFullCoverageSet(const Unit &U); size_t RunOneMaximizeCoveragePairs(const Unit &U); void WriteToOutputCorpus(const Unit &U); - static void WriteToCrash(const Unit &U, const char *Prefix); + void WriteToCrash(const Unit &U, const char *Prefix); + bool MutateWithDFSan(Unit *U); + void PrintStats(const char *Where, size_t Cov, const char *End = "\n"); + void PrintUnitInASCIIOrTokens(const Unit &U, const char *PrintAfter = ""); void SetDeathCallback(); - static void DeathCallback(); - static Unit CurrentUnit; + static void StaticDeathCallback(); + void DeathCallback(); + Unit CurrentUnit; size_t TotalNumberOfRuns = 0; @@ -108,7 +117,8 @@ class Fuzzer { UserCallback Callback; FuzzingOptions Options; system_clock::time_point ProcessStartTime = system_clock::now(); - static system_clock::time_point UnitStartTime; + system_clock::time_point UnitStartTime; + long TimeOfLongestUnitInSeconds = 0; }; }; // namespace fuzzer diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp index 563fbf4..9dfe30b 100644 --- a/lib/Fuzzer/FuzzerLoop.cpp +++ b/lib/Fuzzer/FuzzerLoop.cpp @@ -16,21 +16,49 @@ namespace fuzzer { -// static -Unit Fuzzer::CurrentUnit; -system_clock::time_point Fuzzer::UnitStartTime; +// Only one Fuzzer per process. +static Fuzzer *F; + +Fuzzer::Fuzzer(UserCallback Callback, FuzzingOptions Options) + : Callback(Callback), Options(Options) { + SetDeathCallback(); + InitializeDFSan(); + assert(!F); + F = this; +} void Fuzzer::SetDeathCallback() { - __sanitizer_set_death_callback(DeathCallback); + __sanitizer_set_death_callback(StaticDeathCallback); +} + +void Fuzzer::PrintUnitInASCIIOrTokens(const Unit &U, const char *PrintAfter) { + if (Options.Tokens.empty()) { + PrintASCII(U, PrintAfter); + } else { + auto T = SubstituteTokens(U); + T.push_back(0); + std::cerr << T.data(); + std::cerr << PrintAfter; + } +} + +void Fuzzer::StaticDeathCallback() { + assert(F); + F->DeathCallback(); } void Fuzzer::DeathCallback() { std::cerr << "DEATH: " << std::endl; Print(CurrentUnit, "\n"); - PrintASCII(CurrentUnit, "\n"); + PrintUnitInASCIIOrTokens(CurrentUnit, "\n"); WriteToCrash(CurrentUnit, "crash-"); } +void Fuzzer::StaticAlarmCallback() { + assert(F); + F->AlarmCallback(); +} + void Fuzzer::AlarmCallback() { size_t Seconds = duration_cast<seconds>(system_clock::now() - UnitStartTime).count(); @@ -38,27 +66,40 @@ void Fuzzer::AlarmCallback() { << std::endl; if (Seconds >= 3) { Print(CurrentUnit, "\n"); - PrintASCII(CurrentUnit, "\n"); + PrintUnitInASCIIOrTokens(CurrentUnit, "\n"); WriteToCrash(CurrentUnit, "timeout-"); } exit(1); } +void Fuzzer::PrintStats(const char *Where, size_t Cov, const char *End) { + if (!Options.Verbosity) return; + size_t Seconds = secondsSinceProcessStartUp(); + size_t ExecPerSec = (Seconds ? TotalNumberOfRuns / Seconds : 0); + std::cerr + << "#" << TotalNumberOfRuns + << "\t" << Where + << " cov " << Cov + << " bits " << TotalBits() + << " units " << Corpus.size() + << " exec/s " << ExecPerSec + << End; +} + void Fuzzer::ShuffleAndMinimize() { + size_t MaxCov = 0; bool PreferSmall = (Options.PreferSmallDuringInitialShuffle == 1 || (Options.PreferSmallDuringInitialShuffle == -1 && rand() % 2)); if (Options.Verbosity) - std::cerr << "Shuffle: Size: " << Corpus.size() - << " prefer small: " << PreferSmall - << "\n"; + std::cerr << "PreferSmall: " << PreferSmall << "\n"; + PrintStats("READ ", 0); std::vector<Unit> NewCorpus; std::random_shuffle(Corpus.begin(), Corpus.end()); if (PreferSmall) std::stable_sort( Corpus.begin(), Corpus.end(), [](const Unit &A, const Unit &B) { return A.size() < B.size(); }); - size_t MaxCov = 0; Unit &U = CurrentUnit; for (const auto &C : Corpus) { for (size_t First = 0; First < 1; First++) { @@ -77,18 +118,29 @@ void Fuzzer::ShuffleAndMinimize() { } } Corpus = NewCorpus; - if (Options.Verbosity) - std::cerr << "Shuffle done: " << Corpus.size() << " IC: " << MaxCov << "\n"; + PrintStats("INITED", MaxCov); } size_t Fuzzer::RunOne(const Unit &U) { UnitStartTime = system_clock::now(); TotalNumberOfRuns++; + size_t Res = 0; if (Options.UseFullCoverageSet) - return RunOneMaximizeFullCoverageSet(U); - if (Options.UseCoveragePairs) - return RunOneMaximizeCoveragePairs(U); - return RunOneMaximizeTotalCoverage(U); + Res = RunOneMaximizeFullCoverageSet(U); + else if (Options.UseCoveragePairs) + Res = RunOneMaximizeCoveragePairs(U); + else + Res = RunOneMaximizeTotalCoverage(U); + auto UnitStopTime = system_clock::now(); + auto TimeOfUnit = + duration_cast<seconds>(UnitStopTime - UnitStartTime).count(); + if (TimeOfUnit > TimeOfLongestUnitInSeconds) { + TimeOfLongestUnitInSeconds = TimeOfUnit; + std::cerr << "Longest unit: " << TimeOfLongestUnitInSeconds + << " s:\n"; + Print(U, "\n"); + } + return Res; } static uintptr_t HashOfArrayOfPCs(uintptr_t *PCs, uintptr_t NumPCs) { @@ -99,12 +151,35 @@ static uintptr_t HashOfArrayOfPCs(uintptr_t *PCs, uintptr_t NumPCs) { return Res; } +Unit Fuzzer::SubstituteTokens(const Unit &U) const { + Unit Res; + for (auto Idx : U) { + if (Idx < Options.Tokens.size()) { + std::string Token = Options.Tokens[Idx]; + Res.insert(Res.end(), Token.begin(), Token.end()); + } else { + Res.push_back(' '); + } + } + // FIXME: Apply DFSan labels. + return Res; +} + +void Fuzzer::ExecuteCallback(const Unit &U) { + if (Options.Tokens.empty()) { + Callback(U.data(), U.size()); + } else { + auto T = SubstituteTokens(U); + Callback(T.data(), T.size()); + } +} + // Experimental. Does not yet scale. // Fuly reset the current coverage state, run a single unit, // collect all coverage pairs and return non-zero if a new pair is observed. size_t Fuzzer::RunOneMaximizeCoveragePairs(const Unit &U) { __sanitizer_reset_coverage(); - Callback(U.data(), U.size()); + ExecuteCallback(U); uintptr_t *PCs; uintptr_t NumPCs = __sanitizer_get_coverage_guards(&PCs); bool HasNewPairs = false; @@ -129,7 +204,7 @@ size_t Fuzzer::RunOneMaximizeCoveragePairs(const Unit &U) { // e.g. test/FullCoverageSetTest.cpp. FIXME: make it scale. size_t Fuzzer::RunOneMaximizeFullCoverageSet(const Unit &U) { __sanitizer_reset_coverage(); - Callback(U.data(), U.size()); + ExecuteCallback(U); uintptr_t *PCs; uintptr_t NumPCs =__sanitizer_get_coverage_guards(&PCs); if (FullCoverageSets.insert(HashOfArrayOfPCs(PCs, NumPCs)).second) @@ -144,21 +219,16 @@ size_t Fuzzer::RunOneMaximizeTotalCoverage(const Unit &U) { __sanitizer_update_counter_bitset_and_clear_counters(0); } size_t OldCoverage = __sanitizer_get_total_unique_coverage(); - Callback(U.data(), U.size()); + ExecuteCallback(U); size_t NewCoverage = __sanitizer_get_total_unique_coverage(); size_t NumNewBits = 0; if (Options.UseCounters) NumNewBits = __sanitizer_update_counter_bitset_and_clear_counters( CounterBitmap.data()); - if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)) && Options.Verbosity) { - size_t Seconds = secondsSinceProcessStartUp(); - std::cerr - << "#" << TotalNumberOfRuns - << "\tcov: " << NewCoverage - << "\tbits: " << TotalBits() - << "\texec/s: " << (Seconds ? TotalNumberOfRuns / Seconds : 0) << "\n"; - } + if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)) && Options.Verbosity) + PrintStats("pulse ", NewCoverage); + if (NewCoverage > OldCoverage || NumNewBits) return NewCoverage; return 0; @@ -192,22 +262,18 @@ size_t Fuzzer::MutateAndTestOne(Unit *U) { for (int i = 0; i < Options.MutateDepth; i++) { if (TotalNumberOfRuns >= Options.MaxNumberOfRuns) return NewUnits; + MutateWithDFSan(U); Mutate(U, Options.MaxLen); size_t NewCoverage = RunOne(*U); if (NewCoverage) { Corpus.push_back(*U); NewUnits++; + PrintStats("NEW ", NewCoverage, ""); if (Options.Verbosity) { - std::cerr << "#" << TotalNumberOfRuns - << "\tNEW: " << NewCoverage - << " B: " << TotalBits() - << " L: " << U->size() - << " S: " << Corpus.size() - << " I: " << i - << "\t"; + std::cerr << " L: " << U->size(); if (U->size() < 30) { - PrintASCII(*U); - std::cerr << "\t"; + std::cerr << " "; + PrintUnitInASCIIOrTokens(*U, "\t"); Print(*U); } std::cerr << "\n"; diff --git a/lib/Fuzzer/FuzzerUtil.cpp b/lib/Fuzzer/FuzzerUtil.cpp index 679f289..3635f39 100644 --- a/lib/Fuzzer/FuzzerUtil.cpp +++ b/lib/Fuzzer/FuzzerUtil.cpp @@ -19,15 +19,18 @@ namespace fuzzer { void Print(const Unit &v, const char *PrintAfter) { - std::cerr << v.size() << ": "; for (auto x : v) - std::cerr << (unsigned) x << " "; + std::cerr << "0x" << std::hex << (unsigned) x << std::dec << ","; std::cerr << PrintAfter; } void PrintASCII(const Unit &U, const char *PrintAfter) { - for (auto X : U) - std::cerr << (char)((isascii(X) && X >= ' ') ? X : '?'); + for (auto X : U) { + if (isprint(X)) + std::cerr << X; + else + std::cerr << "\\x" << std::hex << (int)(unsigned)X << std::dec; + } std::cerr << PrintAfter; } @@ -43,7 +46,7 @@ std::string Hash(const Unit &in) { } static void AlarmHandler(int, siginfo_t *, void *) { - Fuzzer::AlarmCallback(); + Fuzzer::StaticAlarmCallback(); } void SetTimer(int Seconds) { diff --git a/lib/Fuzzer/README.txt b/lib/Fuzzer/README.txt index e4d6b4f..79f49b5 100644 --- a/lib/Fuzzer/README.txt +++ b/lib/Fuzzer/README.txt @@ -1,112 +1,2 @@ -=============================== -Fuzzer -- a library for coverage-guided fuzz testing. -=============================== +Move to http://llvm.org/docs/LibFuzzer.html -This library is intended primarily for in-process coverage-guided fuzz testing -(fuzzing) of other libraries. The typical workflow looks like this: - - * Build the Fuzzer library as a static archive (or just a set of .o files). - Note that the Fuzzer contains the main() function. - Preferably do *not* use sanitizers while building the Fuzzer. - * Build the library you are going to test with -fsanitize-coverage=[234] - and one of the sanitizers. We recommend to build the library in several - different modes (e.g. asan, msan, lsan, ubsan, etc) and even using different - optimizations options (e.g. -O0, -O1, -O2) to diversify testing. - * Build a test driver using the same options as the library. - The test driver is a C/C++ file containing interesting calls to the library - inside a single function: - extern "C" void TestOneInput(const uint8_t *Data, size_t Size); - * Link the Fuzzer, the library and the driver together into an executable - using the same sanitizer options as for the library. - * Collect the initial corpus of inputs for the - fuzzer (a directory with test inputs, one file per input). - The better your inputs are the faster you will find something interesting. - Also try to keep your inputs small, otherwise the Fuzzer will run too slow. - * Run the fuzzer with the test corpus. As new interesting test cases are - discovered they will be added to the corpus. If a bug is discovered by - the sanitizer (asan, etc) it will be reported as usual and the reproducer - will be written to disk. - Each Fuzzer process is single-threaded (unless the library starts its own - threads). You can run the Fuzzer on the same corpus in multiple processes. - in parallel. For run-time options run the Fuzzer binary with '-help=1'. - - -The Fuzzer is similar in concept to AFL (http://lcamtuf.coredump.cx/afl/), -but uses in-process Fuzzing, which is more fragile, more restrictive, but -potentially much faster as it has no overhead for process start-up. -It uses LLVM's "Sanitizer Coverage" instrumentation to get in-process -coverage-feedback https://code.google.com/p/address-sanitizer/wiki/AsanCoverage - -The code resides in the LLVM repository and is (or will be) used by various -parts of LLVM, but the Fuzzer itself does not (and should not) depend on any -part of LLVM and can be used for other projects. Ideally, the Fuzzer's code -should not have any external dependencies. Right now it uses STL, which may need -to be fixed later. See also F.A.Q. below. - -Examples of usage in LLVM: - * clang-format-fuzzer. The inputs are random pieces of C++-like text. - * Build (make sure to use fresh clang as the host compiler): - cmake -GNinja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \ - -DLLVM_USE_SANITIZER=Address -DLLVM_USE_SANITIZE_COVERAGE=YES \ - /path/to/llvm -DCMAKE_BUILD_TYPE=Release - ninja clang-format-fuzzer - * Optionally build other kinds of binaries (asan+Debug, msan, ubsan, etc) - * TODO: commit the pre-fuzzed corpus to svn (?). - * Run: - clang-format-fuzzer CORPUS_DIR - -Toy example (see SimpleTest.cpp): -a simple function that does something interesting if it receives bytes "Hi!". - # Build the Fuzzer with asan: - % clang++ -std=c++11 -fsanitize=address -fsanitize-coverage=3 -O1 -g \ - Fuzzer*.cpp test/SimpleTest.cpp - # Run the fuzzer with no corpus (assuming on empty input) - % ./a.out - -=============================================================================== -F.A.Q. - -Q. Why Fuzzer does not use any of the LLVM support? -A. There are two reasons. -First, we want this library to be used outside of the LLVM w/o users having to -build the rest of LLVM. This may sound unconvincing for many LLVM folks, -but in practice the need for building the whole LLVM frightens many potential -users -- and we want more users to use this code. -Second, there is a subtle technical reason not to rely on the rest of LLVM, or -any other large body of code (maybe not even STL). When coverage instrumentation -is enabled, it will also instrument the LLVM support code which will blow up the -coverage set of the process (since the fuzzer is in-process). In other words, by -using more external dependencies we will slow down the fuzzer while the main -reason for it to exist is extreme speed. - -Q. What about Windows then? The Fuzzer contains code that does not build on -Windows. -A. The sanitizer coverage support does not work on Windows either as of 01/2015. -Once it's there, we'll need to re-implement OS-specific parts (I/O, signals). - -Q. When this Fuzzer is not a good solution for a problem? -A. - * If the test inputs are validated by the target library and the validator - asserts/crashes on invalid inputs, the in-process fuzzer is not applicable - (we could use fork() w/o exec, but it comes with extra overhead). - * Bugs in the target library may accumulate w/o being detected. E.g. a memory - corruption that goes undetected at first and then leads to a crash while - testing another input. This is why it is highly recommended to run this - in-process fuzzer with all sanitizers to detect most bugs on the spot. - * It is harder to protect the in-process fuzzer from excessive memory - consumption and infinite loops in the target library (still possible). - * The target library should not have significant global state that is not - reset between the runs. - * Many interesting target libs are not designed in a way that supports - the in-process fuzzer interface (e.g. require a file path instead of a - byte array). - * If a single test run takes a considerable fraction of a second (or - more) the speed benefit from the in-process fuzzer is negligible. - * If the target library runs persistent threads (that outlive - execution of one test) the fuzzing results will be unreliable. - -Q. So, what exactly this Fuzzer is good for? -A. This Fuzzer might be a good choice for testing libraries that have relatively -small inputs, each input takes < 1ms to run, and the library code is not expected -to crash on invalid inputs. -Examples: regular expression matchers, text or binary format parsers. diff --git a/lib/Fuzzer/cxx_fuzzer_tokens.txt b/lib/Fuzzer/cxx_fuzzer_tokens.txt new file mode 100644 index 0000000..f3c4f80 --- /dev/null +++ b/lib/Fuzzer/cxx_fuzzer_tokens.txt @@ -0,0 +1,218 @@ +# +## +` +~ +! +@ +$ +% +^ +& +* +( +) +_ +- +_ += ++ +{ +} +[ +] +| +\ +, +. +/ +? +> +< +; +: +' +" +++ +-- +<< +>> ++= +-= +*= +/= +>>= +<<= +&= +|= +^= +%= +!= +&& +|| +== +>= +<= +-> +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +A +B +C +D +E +F +G +H +I +J +K +L +M +N +O +P +Q +R +S +T +U +V +W +X +Y +Z +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +alignas +alignof +and +and_eq +asm +auto +bitand +bitor +bool +break +case +catch +char +char16_t +char32_t +class +compl +concept +const +constexpr +const_cast +continue +decltype +default +delete +do +double +dynamic_cast +else +enum +explicit +export +extern +false +float +for +friend +goto +if +inline +int +long +mutable +namespace +new +noexcept +not +not_eq +nullptr +operator +or +or_eq +private +protected +public +register +reinterpret_cast +requires +return +short +signed +sizeof +static +static_assert +static_cast +struct +switch +template +this +thread_local +throw +true +try +typedef +typeid +typename +union +unsigned +using +virtual +void +volatile +wchar_t +while +xor +xor_eq +if +elif +else +endif +defined +ifdef +ifndef +define +undef +include +line +error +pragma +override +final diff --git a/lib/Fuzzer/dfsan_fuzzer_abi.list b/lib/Fuzzer/dfsan_fuzzer_abi.list new file mode 100644 index 0000000..7da7522 --- /dev/null +++ b/lib/Fuzzer/dfsan_fuzzer_abi.list @@ -0,0 +1,12 @@ +# Replaces __sanitizer_cov_trace_cmp with __dfsw___sanitizer_cov_trace_cmp +fun:__sanitizer_cov_trace_cmp=custom +fun:__sanitizer_cov_trace_cmp=uninstrumented + +# Ignores coverage callbacks. +fun:__sanitizer_cov=uninstrumented +fun:__sanitizer_cov=discard +fun:__sanitizer_cov_module_init=uninstrumented +fun:__sanitizer_cov_module_init=discard + +# Don't add extra parameters to the Fuzzer callback. +fun:TestOneInput=uninstrumented diff --git a/lib/Fuzzer/test/CMakeLists.txt b/lib/Fuzzer/test/CMakeLists.txt index 08130c6..fb3bf20 100644 --- a/lib/Fuzzer/test/CMakeLists.txt +++ b/lib/Fuzzer/test/CMakeLists.txt @@ -2,10 +2,11 @@ # basic blocks and we'll fail to discover the targets. # Also enable the coverage instrumentation back (it is disabled # for the Fuzzer lib) -set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O0 -fsanitize-coverage=4") +set(CMAKE_CXX_FLAGS_RELEASE "${LIBFUZZER_FLAGS_BASE} -O0 -fsanitize-coverage=4") set(Tests CounterTest + CxxTokensTest FourIndependentBranchesTest FullCoverageSetTest InfiniteTest @@ -14,11 +15,14 @@ set(Tests TimeoutTest ) +set(DFSanTests + DFSanSimpleCmpTest + ) + set(TestBinaries) foreach(Test ${Tests}) add_executable(LLVMFuzzer-${Test} - EXCLUDE_FROM_ALL ${Test}.cpp ) target_link_libraries(LLVMFuzzer-${Test} @@ -52,6 +56,13 @@ target_link_libraries(LLVMFuzzer-Unittest set(TestBinaries ${TestBinaries} LLVMFuzzer-Unittest) +add_subdirectory(dfsan) + +foreach(Test ${DFSanTests}) + set(TestBinaries ${TestBinaries} LLVMFuzzer-${Test}) +endforeach() + + set_target_properties(${TestBinaries} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) diff --git a/lib/Fuzzer/test/CxxTokensTest.cpp b/lib/Fuzzer/test/CxxTokensTest.cpp new file mode 100644 index 0000000..1addccb --- /dev/null +++ b/lib/Fuzzer/test/CxxTokensTest.cpp @@ -0,0 +1,24 @@ +// Simple test for a fuzzer. The fuzzer must find a sequence of C++ tokens. +#include <cstdint> +#include <cstdlib> +#include <cstddef> +#include <cstring> +#include <iostream> + +static void Found() { + std::cout << "Found the target, exiting\n"; + exit(1); +} + +extern "C" void TestOneInput(const uint8_t *Data, size_t Size) { + // looking for "thread_local unsigned A;" + if (Size < 24) return; + if (0 == memcmp(&Data[0], "thread_local", 12)) + if (Data[12] == ' ') + if (0 == memcmp(&Data[13], "unsigned", 8)) + if (Data[21] == ' ') + if (Data[22] == 'A') + if (Data[23] == ';') + Found(); +} + diff --git a/lib/Fuzzer/test/dfsan/CMakeLists.txt b/lib/Fuzzer/test/dfsan/CMakeLists.txt new file mode 100644 index 0000000..b5b874f --- /dev/null +++ b/lib/Fuzzer/test/dfsan/CMakeLists.txt @@ -0,0 +1,17 @@ +# These tests depend on both coverage and dfsan instrumentation. + +set(DFSAN_FUZZER_ABI_LIST "${CMAKE_CURRENT_SOURCE_DIR}/../../dfsan_fuzzer_abi.list") + +set(CMAKE_CXX_FLAGS_RELEASE + "${LIBFUZZER_FLAGS_BASE} -O0 -fno-sanitize=all -fsanitize=dataflow -mllvm -sanitizer-coverage-experimental-trace-compares=1 -fsanitize-blacklist=${DFSAN_FUZZER_ABI_LIST}") + +foreach(Test ${DFSanTests}) + set_source_files_properties(${Test}.cpp PROPERTIES OBJECT_DEPENDS ${DFSAN_FUZZER_ABI_LIST}) + add_executable(LLVMFuzzer-${Test} + ${Test}.cpp + ) + target_link_libraries(LLVMFuzzer-${Test} + LLVMFuzzer + ) +endforeach() + diff --git a/lib/Fuzzer/test/dfsan/DFSanSimpleCmpTest.cpp b/lib/Fuzzer/test/dfsan/DFSanSimpleCmpTest.cpp new file mode 100644 index 0000000..1162092 --- /dev/null +++ b/lib/Fuzzer/test/dfsan/DFSanSimpleCmpTest.cpp @@ -0,0 +1,30 @@ +// Simple test for a fuzzer. The fuzzer must find several narrow ranges. +#include <cstdint> +#include <cstdlib> +#include <cstring> +#include <cstdio> + +extern "C" void TestOneInput(const uint8_t *Data, size_t Size) { + if (Size < 14) return; + uint64_t x = 0; + int64_t y = 0; + int z = 0; + unsigned short a = 0; + memcpy(&x, Data, 8); + memcpy(&y, Data + Size - 8, 8); + memcpy(&z, Data + Size / 2, sizeof(z)); + memcpy(&a, Data + Size / 2 + 4, sizeof(a)); + + if (x > 1234567890 && + x < 1234567895 && + y >= 987654321 && + y <= 987654325 && + z < -10000 && + z >= -10005 && + z != -10003 && + a == 4242) { + fprintf(stderr, "Found the target: size %zd (%zd, %zd, %d, %d), exiting.\n", + Size, x, y, z, a); + exit(1); + } +} diff --git a/lib/Fuzzer/test/fuzzer.test b/lib/Fuzzer/test/fuzzer.test index 45691f5..2a0e95f 100644 --- a/lib/Fuzzer/test/fuzzer.test +++ b/lib/Fuzzer/test/fuzzer.test @@ -20,3 +20,9 @@ FourIndependentBranchesTest: BINGO RUN: not ./LLVMFuzzer-CounterTest -use_counters=1 -max_len=6 -seed=1 -timeout=15 2>&1 | FileCheck %s --check-prefix=CounterTest CounterTest: BINGO + +RUN: not ./LLVMFuzzer-DFSanSimpleCmpTest -seed=1 -timeout=15 2>&1 | FileCheck %s --check-prefix=DFSanSimpleCmpTest +DFSanSimpleCmpTest: Found the target: + +RUN: not ./LLVMFuzzer-CxxTokensTest -seed=1 -timeout=15 -tokens=%S/../cxx_fuzzer_tokens.txt 2>&1 | FileCheck %s --check-prefix=CxxTokensTest +CxxTokensTest: Found the target, exiting diff --git a/lib/IR/Android.mk b/lib/IR/Android.mk index 2ca02f7..32ebfae 100644 --- a/lib/IR/Android.mk +++ b/lib/IR/Android.mk @@ -43,7 +43,6 @@ vmcore_SRC_FILES := \ Type.cpp \ TypeFinder.cpp \ Use.cpp \ - UseListOrder.cpp \ User.cpp \ Value.cpp \ ValueSymbolTable.cpp \ diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index ae0beba..48737b5 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -401,9 +401,7 @@ public: /// NumberedTypes - The numbered types, along with their value. DenseMap<StructType*, unsigned> NumberedTypes; - - TypePrinting() {} - ~TypePrinting() {} + TypePrinting() = default; void incorporateTypes(const Module &M); @@ -1443,11 +1441,11 @@ void MDFieldPrinter::printDIFlags(StringRef Name, unsigned Flags) { Out << FS << Name << ": "; SmallVector<unsigned, 8> SplitFlags; - unsigned Extra = DIDescriptor::splitFlags(Flags, SplitFlags); + unsigned Extra = DebugNode::splitFlags(Flags, SplitFlags); FieldSeparator FlagsFS(" | "); for (unsigned F : SplitFlags) { - const char *StringF = DIDescriptor::getFlagString(F); + const char *StringF = DebugNode::getFlagString(F); assert(StringF && "Expected valid flag"); Out << FlagsFS << StringF; } @@ -1505,7 +1503,7 @@ static void writeMDSubrange(raw_ostream &Out, const MDSubrange *N, Out << "!MDSubrange("; MDFieldPrinter Printer(Out); Printer.printInt("count", N->getCount(), /* ShouldSkipZero */ false); - Printer.printInt("lowerBound", N->getLo()); + Printer.printInt("lowerBound", N->getLowerBound()); Out << ")"; } @@ -1539,16 +1537,16 @@ static void writeMDDerivedType(raw_ostream &Out, const MDDerivedType *N, MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); Printer.printTag(N); Printer.printString("name", N->getName()); - Printer.printMetadata("scope", N->getScope()); - Printer.printMetadata("file", N->getFile()); + Printer.printMetadata("scope", N->getRawScope()); + Printer.printMetadata("file", N->getRawFile()); Printer.printInt("line", N->getLine()); - Printer.printMetadata("baseType", N->getBaseType(), + Printer.printMetadata("baseType", N->getRawBaseType(), /* ShouldSkipNull */ false); Printer.printInt("size", N->getSizeInBits()); Printer.printInt("align", N->getAlignInBits()); Printer.printInt("offset", N->getOffsetInBits()); Printer.printDIFlags("flags", N->getFlags()); - Printer.printMetadata("extraData", N->getExtraData()); + Printer.printMetadata("extraData", N->getRawExtraData()); Out << ")"; } @@ -1559,19 +1557,19 @@ static void writeMDCompositeType(raw_ostream &Out, const MDCompositeType *N, MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); Printer.printTag(N); Printer.printString("name", N->getName()); - Printer.printMetadata("scope", N->getScope()); - Printer.printMetadata("file", N->getFile()); + Printer.printMetadata("scope", N->getRawScope()); + Printer.printMetadata("file", N->getRawFile()); Printer.printInt("line", N->getLine()); - Printer.printMetadata("baseType", N->getBaseType()); + Printer.printMetadata("baseType", N->getRawBaseType()); Printer.printInt("size", N->getSizeInBits()); Printer.printInt("align", N->getAlignInBits()); Printer.printInt("offset", N->getOffsetInBits()); Printer.printDIFlags("flags", N->getFlags()); - Printer.printMetadata("elements", N->getElements()); + Printer.printMetadata("elements", N->getRawElements()); Printer.printDwarfEnum("runtimeLang", N->getRuntimeLang(), dwarf::LanguageString); - Printer.printMetadata("vtableHolder", N->getVTableHolder()); - Printer.printMetadata("templateParams", N->getTemplateParams()); + Printer.printMetadata("vtableHolder", N->getRawVTableHolder()); + Printer.printMetadata("templateParams", N->getRawTemplateParams()); Printer.printString("identifier", N->getIdentifier()); Out << ")"; } @@ -1582,7 +1580,8 @@ static void writeMDSubroutineType(raw_ostream &Out, const MDSubroutineType *N, Out << "!MDSubroutineType("; MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); Printer.printDIFlags("flags", N->getFlags()); - Printer.printMetadata("types", N->getTypeArray(), /* ShouldSkipNull */ false); + Printer.printMetadata("types", N->getRawTypeArray(), + /* ShouldSkipNull */ false); Out << ")"; } @@ -1604,7 +1603,7 @@ static void writeMDCompileUnit(raw_ostream &Out, const MDCompileUnit *N, MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); Printer.printDwarfEnum("language", N->getSourceLanguage(), dwarf::LanguageString, /* ShouldSkipZero */ false); - Printer.printMetadata("file", N->getFile(), /* ShouldSkipNull */ false); + Printer.printMetadata("file", N->getRawFile(), /* ShouldSkipNull */ false); Printer.printString("producer", N->getProducer()); Printer.printBool("isOptimized", N->isOptimized()); Printer.printString("flags", N->getFlags()); @@ -1613,11 +1612,11 @@ static void writeMDCompileUnit(raw_ostream &Out, const MDCompileUnit *N, Printer.printString("splitDebugFilename", N->getSplitDebugFilename()); Printer.printInt("emissionKind", N->getEmissionKind(), /* ShouldSkipZero */ false); - Printer.printMetadata("enums", N->getEnumTypes()); - Printer.printMetadata("retainedTypes", N->getRetainedTypes()); - Printer.printMetadata("subprograms", N->getSubprograms()); - Printer.printMetadata("globals", N->getGlobalVariables()); - Printer.printMetadata("imports", N->getImportedEntities()); + Printer.printMetadata("enums", N->getRawEnumTypes()); + Printer.printMetadata("retainedTypes", N->getRawRetainedTypes()); + Printer.printMetadata("subprograms", N->getRawSubprograms()); + Printer.printMetadata("globals", N->getRawGlobalVariables()); + Printer.printMetadata("imports", N->getRawImportedEntities()); Out << ")"; } @@ -1628,23 +1627,23 @@ static void writeMDSubprogram(raw_ostream &Out, const MDSubprogram *N, MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); Printer.printString("name", N->getName()); Printer.printString("linkageName", N->getLinkageName()); - Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false); - Printer.printMetadata("file", N->getFile()); + Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false); + Printer.printMetadata("file", N->getRawFile()); Printer.printInt("line", N->getLine()); - Printer.printMetadata("type", N->getType()); + Printer.printMetadata("type", N->getRawType()); Printer.printBool("isLocal", N->isLocalToUnit()); Printer.printBool("isDefinition", N->isDefinition()); Printer.printInt("scopeLine", N->getScopeLine()); - Printer.printMetadata("containingType", N->getContainingType()); + Printer.printMetadata("containingType", N->getRawContainingType()); Printer.printDwarfEnum("virtuality", N->getVirtuality(), dwarf::VirtualityString); Printer.printInt("virtualIndex", N->getVirtualIndex()); Printer.printDIFlags("flags", N->getFlags()); Printer.printBool("isOptimized", N->isOptimized()); - Printer.printMetadata("function", N->getFunction()); - Printer.printMetadata("templateParams", N->getTemplateParams()); - Printer.printMetadata("declaration", N->getDeclaration()); - Printer.printMetadata("variables", N->getVariables()); + Printer.printMetadata("function", N->getRawFunction()); + Printer.printMetadata("templateParams", N->getRawTemplateParams()); + Printer.printMetadata("declaration", N->getRawDeclaration()); + Printer.printMetadata("variables", N->getRawVariables()); Out << ")"; } @@ -1653,8 +1652,8 @@ static void writeMDLexicalBlock(raw_ostream &Out, const MDLexicalBlock *N, const Module *Context) { Out << "!MDLexicalBlock("; MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); - Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false); - Printer.printMetadata("file", N->getFile()); + Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false); + Printer.printMetadata("file", N->getRawFile()); Printer.printInt("line", N->getLine()); Printer.printInt("column", N->getColumn()); Out << ")"; @@ -1667,8 +1666,8 @@ static void writeMDLexicalBlockFile(raw_ostream &Out, const Module *Context) { Out << "!MDLexicalBlockFile("; MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); - Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false); - Printer.printMetadata("file", N->getFile()); + Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false); + Printer.printMetadata("file", N->getRawFile()); Printer.printInt("discriminator", N->getDiscriminator(), /* ShouldSkipZero */ false); Out << ")"; @@ -1680,8 +1679,8 @@ static void writeMDNamespace(raw_ostream &Out, const MDNamespace *N, Out << "!MDNamespace("; MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); Printer.printString("name", N->getName()); - Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false); - Printer.printMetadata("file", N->getFile()); + Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false); + Printer.printMetadata("file", N->getRawFile()); Printer.printInt("line", N->getLine()); Out << ")"; } @@ -1694,7 +1693,7 @@ static void writeMDTemplateTypeParameter(raw_ostream &Out, Out << "!MDTemplateTypeParameter("; MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); Printer.printString("name", N->getName()); - Printer.printMetadata("type", N->getType(), /* ShouldSkipNull */ false); + Printer.printMetadata("type", N->getRawType(), /* ShouldSkipNull */ false); Out << ")"; } @@ -1708,7 +1707,7 @@ static void writeMDTemplateValueParameter(raw_ostream &Out, if (N->getTag() != dwarf::DW_TAG_template_value_parameter) Printer.printTag(N); Printer.printString("name", N->getName()); - Printer.printMetadata("type", N->getType()); + Printer.printMetadata("type", N->getRawType()); Printer.printMetadata("value", N->getValue(), /* ShouldSkipNull */ false); Out << ")"; } @@ -1720,14 +1719,14 @@ static void writeMDGlobalVariable(raw_ostream &Out, const MDGlobalVariable *N, MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); Printer.printString("name", N->getName()); Printer.printString("linkageName", N->getLinkageName()); - Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false); - Printer.printMetadata("file", N->getFile()); + Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false); + Printer.printMetadata("file", N->getRawFile()); Printer.printInt("line", N->getLine()); - Printer.printMetadata("type", N->getType()); + Printer.printMetadata("type", N->getRawType()); Printer.printBool("isLocal", N->isLocalToUnit()); Printer.printBool("isDefinition", N->isDefinition()); - Printer.printMetadata("variable", N->getVariable()); - Printer.printMetadata("declaration", N->getStaticDataMemberDeclaration()); + Printer.printMetadata("variable", N->getRawVariable()); + Printer.printMetadata("declaration", N->getRawStaticDataMemberDeclaration()); Out << ")"; } @@ -1741,12 +1740,11 @@ static void writeMDLocalVariable(raw_ostream &Out, const MDLocalVariable *N, Printer.printInt("arg", N->getArg(), /* ShouldSkipZero */ N->getTag() == dwarf::DW_TAG_auto_variable); - Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false); - Printer.printMetadata("file", N->getFile()); + Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false); + Printer.printMetadata("file", N->getRawFile()); Printer.printInt("line", N->getLine()); - Printer.printMetadata("type", N->getType()); + Printer.printMetadata("type", N->getRawType()); Printer.printDIFlags("flags", N->getFlags()); - Printer.printMetadata("inlinedAt", N->getInlinedAt()); Out << ")"; } @@ -1777,12 +1775,12 @@ static void writeMDObjCProperty(raw_ostream &Out, const MDObjCProperty *N, Out << "!MDObjCProperty("; MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); Printer.printString("name", N->getName()); - Printer.printMetadata("file", N->getFile()); + Printer.printMetadata("file", N->getRawFile()); Printer.printInt("line", N->getLine()); Printer.printString("setter", N->getSetterName()); Printer.printString("getter", N->getGetterName()); Printer.printInt("attributes", N->getAttributes()); - Printer.printMetadata("type", N->getType()); + Printer.printMetadata("type", N->getRawType()); Out << ")"; } @@ -1793,8 +1791,8 @@ static void writeMDImportedEntity(raw_ostream &Out, const MDImportedEntity *N, MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); Printer.printTag(N); Printer.printString("name", N->getName()); - Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false); - Printer.printMetadata("entity", N->getEntity()); + Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false); + Printer.printMetadata("entity", N->getRawEntity()); Printer.printInt("line", N->getLine()); Out << ")"; } @@ -1943,16 +1941,19 @@ class AssemblyWriter { TypePrinting TypePrinter; AssemblyAnnotationWriter *AnnotationWriter; SetVector<const Comdat *> Comdats; + bool ShouldPreserveUseListOrder; UseListOrderStack UseListOrders; public: /// Construct an AssemblyWriter with an external SlotTracker - AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, - const Module *M, AssemblyAnnotationWriter *AAW); + AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, const Module *M, + AssemblyAnnotationWriter *AAW, + bool ShouldPreserveUseListOrder = false); /// Construct an AssemblyWriter with an internally allocated SlotTracker AssemblyWriter(formatted_raw_ostream &o, const Module *M, - AssemblyAnnotationWriter *AAW); + AssemblyAnnotationWriter *AAW, + bool ShouldPreserveUseListOrder = false); void printMDNodeBody(const MDNode *MD); void printNamedMDNode(const NamedMDNode *NMD); @@ -2004,18 +2005,20 @@ void AssemblyWriter::init() { Comdats.insert(C); } - AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, - const Module *M, - AssemblyAnnotationWriter *AAW) - : Out(o), TheModule(M), Machine(Mac), AnnotationWriter(AAW) { + const Module *M, AssemblyAnnotationWriter *AAW, + bool ShouldPreserveUseListOrder) + : Out(o), TheModule(M), Machine(Mac), AnnotationWriter(AAW), + ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) { init(); } AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, const Module *M, - AssemblyAnnotationWriter *AAW) - : Out(o), TheModule(M), ModuleSlotTracker(createSlotTracker(M)), - Machine(*ModuleSlotTracker), AnnotationWriter(AAW) { + AssemblyAnnotationWriter *AAW, + bool ShouldPreserveUseListOrder) + : Out(o), TheModule(M), ModuleSlotTracker(createSlotTracker(M)), + Machine(*ModuleSlotTracker), AnnotationWriter(AAW), + ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) { init(); } @@ -2103,7 +2106,7 @@ void AssemblyWriter::writeParamOperand(const Value *Operand, void AssemblyWriter::printModule(const Module *M) { Machine.initialize(); - if (shouldPreserveAssemblyUseListOrder()) + if (ShouldPreserveUseListOrder) UseListOrders = predictUseListOrder(M); if (!M->getModuleIdentifier().empty() && @@ -2778,8 +2781,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) { } Operand = CI->getCalledValue(); - PointerType *PTy = cast<PointerType>(Operand->getType()); - FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); + FunctionType *FTy = cast<FunctionType>(CI->getFunctionType()); Type *RetTy = FTy->getReturnType(); const AttributeSet &PAL = CI->getAttributes(); @@ -2791,15 +2793,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) { // and if the return type is not a pointer to a function. // Out << ' '; - if (!FTy->isVarArg() && - (!RetTy->isPointerTy() || - !cast<PointerType>(RetTy)->getElementType()->isFunctionTy())) { - TypePrinter.print(RetTy, Out); - Out << ' '; - writeOperand(Operand, false); - } else { - writeOperand(Operand, true); - } + TypePrinter.print(FTy->isVarArg() ? FTy : RetTy, Out); + Out << ' '; + writeOperand(Operand, false); Out << '('; for (unsigned op = 0, Eop = CI->getNumArgOperands(); op < Eop; ++op) { if (op > 0) @@ -3060,10 +3056,18 @@ void AssemblyWriter::printUseLists(const Function *F) { // External Interface declarations //===----------------------------------------------------------------------===// -void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const { +void Function::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const { + SlotTracker SlotTable(this->getParent()); + formatted_raw_ostream OS(ROS); + AssemblyWriter W(OS, SlotTable, this->getParent(), AAW); + W.printFunction(this); +} + +void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW, + bool ShouldPreserveUseListOrder) const { SlotTracker SlotTable(this); formatted_raw_ostream OS(ROS); - AssemblyWriter W(OS, SlotTable, this, AAW); + AssemblyWriter W(OS, SlotTable, this, AAW, ShouldPreserveUseListOrder); W.printModule(this); } diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index 199c318..d544689 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -115,10 +115,10 @@ class IntAttributeImpl : public EnumAttributeImpl { public: IntAttributeImpl(Attribute::AttrKind Kind, uint64_t Val) : EnumAttributeImpl(IntAttrEntry, Kind), Val(Val) { - assert( - (Kind == Attribute::Alignment || Kind == Attribute::StackAlignment || - Kind == Attribute::Dereferenceable) && - "Wrong kind for int attribute!"); + assert((Kind == Attribute::Alignment || Kind == Attribute::StackAlignment || + Kind == Attribute::Dereferenceable || + Kind == Attribute::DereferenceableOrNull) && + "Wrong kind for int attribute!"); } uint64_t getValue() const { return Val; } diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index daac6b5..be5b74f 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -94,6 +94,12 @@ Attribute Attribute::getWithDereferenceableBytes(LLVMContext &Context, return get(Context, Dereferenceable, Bytes); } +Attribute Attribute::getWithDereferenceableOrNullBytes(LLVMContext &Context, + uint64_t Bytes) { + assert(Bytes && "Bytes must be non-zero."); + return get(Context, DereferenceableOrNull, Bytes); +} + //===----------------------------------------------------------------------===// // Attribute Accessor Methods //===----------------------------------------------------------------------===// @@ -170,6 +176,13 @@ uint64_t Attribute::getDereferenceableBytes() const { return pImpl->getValueAsInt(); } +uint64_t Attribute::getDereferenceableOrNullBytes() const { + assert(hasAttribute(Attribute::DereferenceableOrNull) && + "Trying to get dereferenceable bytes from " + "non-dereferenceable attribute!"); + return pImpl->getValueAsInt(); +} + std::string Attribute::getAsString(bool InAttrGrp) const { if (!pImpl) return ""; @@ -263,9 +276,9 @@ std::string Attribute::getAsString(bool InAttrGrp) const { return Result; } - if (hasAttribute(Attribute::StackAlignment)) { + auto AttrWithBytesToString = [&](const char *Name) { std::string Result; - Result += "alignstack"; + Result += Name; if (InAttrGrp) { Result += "="; Result += utostr(getValueAsInt()); @@ -275,21 +288,16 @@ std::string Attribute::getAsString(bool InAttrGrp) const { Result += ")"; } return Result; - } + }; - if (hasAttribute(Attribute::Dereferenceable)) { - std::string Result; - Result += "dereferenceable"; - if (InAttrGrp) { - Result += "="; - Result += utostr(getValueAsInt()); - } else { - Result += "("; - Result += utostr(getValueAsInt()); - Result += ")"; - } - return Result; - } + if (hasAttribute(Attribute::StackAlignment)) + return AttrWithBytesToString("alignstack"); + + if (hasAttribute(Attribute::Dereferenceable)) + return AttrWithBytesToString("dereferenceable"); + + if (hasAttribute(Attribute::DereferenceableOrNull)) + return AttrWithBytesToString("dereferenceable_or_null"); // Convert target-dependent attributes to strings of the form: // @@ -298,12 +306,12 @@ std::string Attribute::getAsString(bool InAttrGrp) const { // if (isStringAttribute()) { std::string Result; - Result += '\"' + getKindAsString().str() + '"'; + Result += (Twine('"') + getKindAsString() + Twine('"')).str(); StringRef Val = pImpl->getValueAsString(); if (Val.empty()) return Result; - Result += "=\"" + Val.str() + '"'; + Result += ("=\"" + Val + Twine('"')).str(); return Result; } @@ -428,6 +436,11 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { case Attribute::JumpTable: return 1ULL << 45; case Attribute::Dereferenceable: llvm_unreachable("dereferenceable attribute not supported in raw format"); + break; + case Attribute::DereferenceableOrNull: + llvm_unreachable("dereferenceable_or_null attribute not supported in raw " + "format"); + break; } llvm_unreachable("Unsupported attribute type"); } @@ -663,6 +676,10 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index, Attrs.push_back(std::make_pair(Index, Attribute::getWithDereferenceableBytes(C, B.getDereferenceableBytes()))); + else if (Kind == Attribute::DereferenceableOrNull) + Attrs.push_back( + std::make_pair(Index, Attribute::getWithDereferenceableOrNullBytes( + C, B.getDereferenceableOrNullBytes()))); else Attrs.push_back(std::make_pair(Index, Attribute::get(C, Kind))); } @@ -842,6 +859,14 @@ AttributeSet AttributeSet::addDereferenceableAttr(LLVMContext &C, unsigned Index return addAttributes(C, Index, AttributeSet::get(C, Index, B)); } +AttributeSet AttributeSet::addDereferenceableOrNullAttr(LLVMContext &C, + unsigned Index, + uint64_t Bytes) const { + llvm::AttrBuilder B; + B.addDereferenceableOrNullAttr(Bytes); + return addAttributes(C, Index, AttributeSet::get(C, Index, B)); +} + //===----------------------------------------------------------------------===// // AttributeSet Accessor Methods //===----------------------------------------------------------------------===// @@ -1011,7 +1036,8 @@ void AttributeSet::dump() const { //===----------------------------------------------------------------------===// AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Index) - : Attrs(0), Alignment(0), StackAlignment(0), DerefBytes(0) { + : Attrs(0), Alignment(0), StackAlignment(0), DerefBytes(0), + DerefOrNullBytes(0) { AttributeSetImpl *pImpl = AS.pImpl; if (!pImpl) return; @@ -1028,7 +1054,7 @@ AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Index) void AttrBuilder::clear() { Attrs.reset(); - Alignment = StackAlignment = DerefBytes = 0; + Alignment = StackAlignment = DerefBytes = DerefOrNullBytes = 0; } AttrBuilder &AttrBuilder::addAttribute(Attribute::AttrKind Val) { @@ -1055,6 +1081,8 @@ AttrBuilder &AttrBuilder::addAttribute(Attribute Attr) { StackAlignment = Attr.getStackAlignment(); else if (Kind == Attribute::Dereferenceable) DerefBytes = Attr.getDereferenceableBytes(); + else if (Kind == Attribute::DereferenceableOrNull) + DerefOrNullBytes = Attr.getDereferenceableOrNullBytes(); return *this; } @@ -1073,6 +1101,8 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) { StackAlignment = 0; else if (Val == Attribute::Dereferenceable) DerefBytes = 0; + else if (Val == Attribute::DereferenceableOrNull) + DerefOrNullBytes = 0; return *this; } @@ -1099,6 +1129,8 @@ AttrBuilder &AttrBuilder::removeAttributes(AttributeSet A, uint64_t Index) { StackAlignment = 0; else if (Kind == Attribute::Dereferenceable) DerefBytes = 0; + else if (Kind == Attribute::DereferenceableOrNull) + DerefOrNullBytes = 0; } else { assert(Attr.isStringAttribute() && "Invalid attribute type!"); std::map<std::string, std::string>::iterator @@ -1149,6 +1181,15 @@ AttrBuilder &AttrBuilder::addDereferenceableAttr(uint64_t Bytes) { return *this; } +AttrBuilder &AttrBuilder::addDereferenceableOrNullAttr(uint64_t Bytes) { + if (Bytes == 0) + return *this; + + Attrs[Attribute::DereferenceableOrNull] = true; + DerefOrNullBytes = Bytes; + return *this; +} + AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) { // FIXME: What if both have alignments, but they don't match?! if (!Alignment) @@ -1225,7 +1266,8 @@ AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) { for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; I = Attribute::AttrKind(I + 1)) { - if (I == Attribute::Dereferenceable) + if (I == Attribute::Dereferenceable || + I == Attribute::DereferenceableOrNull) continue; if (uint64_t A = (Val & AttributeImpl::getAttrMask(I))) { Attrs[I] = true; @@ -1261,6 +1303,7 @@ AttributeSet AttributeFuncs::typeIncompatible(Type *Ty, uint64_t Index) { .addAttribute(Attribute::NoCapture) .addAttribute(Attribute::NonNull) .addDereferenceableAttr(1) // the int here is ignored + .addDereferenceableOrNullAttr(1) // the int here is ignored .addAttribute(Attribute::ReadNone) .addAttribute(Attribute::ReadOnly) .addAttribute(Attribute::StructRet) diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp index d2dfeaa..bb23d2c 100644 --- a/lib/IR/AutoUpgrade.cpp +++ b/lib/IR/AutoUpgrade.cpp @@ -124,19 +124,6 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { } break; } - case 'd': { - if (Name.startswith("dbg.declare") && F->arg_size() == 2) { - F->setName(Name + ".old"); - NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_declare); - return true; - } - if (Name.startswith("dbg.value") && F->arg_size() == 3) { - F->setName(Name + ".old"); - NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value); - return true; - } - break; - } case 'o': // We only need to change the name to match the mangling including the @@ -354,23 +341,6 @@ bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) { return false; } -static MDNode *getNodeField(const MDNode *DbgNode, unsigned Elt) { - if (!DbgNode || Elt >= DbgNode->getNumOperands()) - return nullptr; - return dyn_cast_or_null<MDNode>(DbgNode->getOperand(Elt)); -} - -static MetadataAsValue *getExpression(Value *VarOperand, Function *F) { - // Old-style DIVariables have an optional expression as the 8th element. - DIExpression Expr(getNodeField( - cast<MDNode>(cast<MetadataAsValue>(VarOperand)->getMetadata()), 8)); - if (!Expr) { - DIBuilder DIB(*F->getParent(), /*AllowUnresolved*/ false); - Expr = DIB.createExpression(); - } - return MetadataAsValue::get(F->getContext(), Expr); -} - // Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them // to byte shuffles. static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C, @@ -745,7 +715,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { return; } - std::string Name = CI->getName().str(); + std::string Name = CI->getName(); if (!Name.empty()) CI->setName(Name + ".old"); @@ -753,25 +723,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { default: llvm_unreachable("Unknown function for CallInst upgrade."); - // Upgrade debug intrinsics to use an additional DIExpression argument. - case Intrinsic::dbg_declare: { - auto NewCI = - Builder.CreateCall3(NewFn, CI->getArgOperand(0), CI->getArgOperand(1), - getExpression(CI->getArgOperand(1), F), Name); - NewCI->setDebugLoc(CI->getDebugLoc()); - CI->replaceAllUsesWith(NewCI); - CI->eraseFromParent(); - return; - } - case Intrinsic::dbg_value: { - auto NewCI = Builder.CreateCall4( - NewFn, CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), - getExpression(CI->getArgOperand(2), F), Name); - NewCI->setDebugLoc(CI->getDebugLoc()); - CI->replaceAllUsesWith(NewCI); - CI->eraseFromParent(); - return; - } case Intrinsic::ctlz: case Intrinsic::cttz: assert(CI->getNumArgOperands() == 1 && diff --git a/lib/IR/BasicBlock.cpp b/lib/IR/BasicBlock.cpp index fe38385..23ec705 100644 --- a/lib/IR/BasicBlock.cpp +++ b/lib/IR/BasicBlock.cpp @@ -94,8 +94,8 @@ void BasicBlock::removeFromParent() { getParent()->getBasicBlockList().remove(this); } -void BasicBlock::eraseFromParent() { - getParent()->getBasicBlockList().erase(this); +iplist<BasicBlock>::iterator BasicBlock::eraseFromParent() { + return getParent()->getBasicBlockList().erase(this); } /// Unlink this basic block from its current function and diff --git a/lib/IR/CMakeLists.txt b/lib/IR/CMakeLists.txt index 9fef0b2..d2e0c38 100644 --- a/lib/IR/CMakeLists.txt +++ b/lib/IR/CMakeLists.txt @@ -41,7 +41,6 @@ add_llvm_library(LLVMCore Type.cpp TypeFinder.cpp Use.cpp - UseListOrder.cpp User.cpp Value.cpp ValueSymbolTable.cpp diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp index d97d2c4..d3caf04 100644 --- a/lib/IR/ConstantFold.cpp +++ b/lib/IR/ConstantFold.cpp @@ -132,7 +132,8 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) { if (ElTy == DPTy->getElementType()) // This GEP is inbounds because all indices are zero. - return ConstantExpr::getInBoundsGetElementPtr(V, IdxList); + return ConstantExpr::getInBoundsGetElementPtr(PTy->getElementType(), + V, IdxList); } // Handle casts from one vector constant to another. We know that the src @@ -169,7 +170,8 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) { // be the same. Consequently, we just fold to V. return V; - if (DestTy->isFloatingPointTy()) + // See note below regarding the PPC_FP128 restriction. + if (DestTy->isFloatingPointTy() && !DestTy->isPPC_FP128Ty()) return ConstantFP::get(DestTy->getContext(), APFloat(DestTy->getFltSemantics(), CI->getValue())); @@ -179,9 +181,19 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) { } // Handle ConstantFP input: FP -> Integral. - if (ConstantFP *FP = dyn_cast<ConstantFP>(V)) + if (ConstantFP *FP = dyn_cast<ConstantFP>(V)) { + // PPC_FP128 is really the sum of two consecutive doubles, where the first + // double is always stored first in memory, regardless of the target + // endianness. The memory layout of i128, however, depends on the target + // endianness, and so we can't fold this without target endianness + // information. This should instead be handled by + // Analysis/ConstantFolding.cpp + if (FP->getType()->isPPC_FP128Ty()) + return nullptr; + return ConstantInt::get(FP->getContext(), FP->getValueAPF().bitcastToAPInt()); + } return nullptr; } @@ -2020,7 +2032,8 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C, if (isa<UndefValue>(C)) { PointerType *Ptr = cast<PointerType>(C->getType()); - Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs); + Type *Ty = GetElementPtrInst::getIndexedType( + cast<PointerType>(Ptr->getScalarType())->getElementType(), Idxs); assert(Ty && "Invalid indices for GEP!"); return UndefValue::get(PointerType::get(Ty, Ptr->getAddressSpace())); } @@ -2034,7 +2047,8 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C, } if (isNull) { PointerType *Ptr = cast<PointerType>(C->getType()); - Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs); + Type *Ty = GetElementPtrInst::getIndexedType( + cast<PointerType>(Ptr->getScalarType())->getElementType(), Idxs); assert(Ty && "Invalid indices for GEP!"); return ConstantPointerNull::get(PointerType::get(Ty, Ptr->getAddressSpace())); @@ -2107,10 +2121,9 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C, NewIndices.push_back(Combined); NewIndices.append(Idxs.begin() + 1, Idxs.end()); - return - ConstantExpr::getGetElementPtr(CE->getOperand(0), NewIndices, - inBounds && - cast<GEPOperator>(CE)->isInBounds()); + return ConstantExpr::getGetElementPtr( + cast<GEPOperator>(CE)->getSourceElementType(), CE->getOperand(0), + NewIndices, inBounds && cast<GEPOperator>(CE)->isInBounds()); } } @@ -2135,8 +2148,8 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C, if (SrcArrayTy && DstArrayTy && SrcArrayTy->getElementType() == DstArrayTy->getElementType() && SrcPtrTy->getAddressSpace() == DstPtrTy->getAddressSpace()) - return ConstantExpr::getGetElementPtr((Constant*)CE->getOperand(0), - Idxs, inBounds); + return ConstantExpr::getGetElementPtr( + SrcArrayTy, (Constant *)CE->getOperand(0), Idxs, inBounds); } } } @@ -2202,7 +2215,7 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C, if (!NewIdxs.empty()) { for (unsigned i = 0, e = Idxs.size(); i != e; ++i) if (!NewIdxs[i]) NewIdxs[i] = cast<Constant>(Idxs[i]); - return ConstantExpr::getGetElementPtr(C, NewIdxs, inBounds); + return ConstantExpr::getGetElementPtr(nullptr, C, NewIdxs, inBounds); } // If all indices are known integers and normalized, we can do a simple @@ -2210,7 +2223,7 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C, if (!Unknown && !inBounds) if (auto *GV = dyn_cast<GlobalVariable>(C)) if (!GV->hasExternalWeakLinkage() && isInBoundsIndices(Idxs)) - return ConstantExpr::getInBoundsGetElementPtr(C, Idxs); + return ConstantExpr::getInBoundsGetElementPtr(nullptr, C, Idxs); return nullptr; } diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp index e51a396..3f8d1f1 100644 --- a/lib/IR/Constants.cpp +++ b/lib/IR/Constants.cpp @@ -1252,7 +1252,7 @@ Constant *ConstantExpr::getWithOperands(ArrayRef<Constant *> Ops, Type *Ty, return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2], OnlyIfReducedTy); case Instruction::GetElementPtr: - return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1), + return ConstantExpr::getGetElementPtr(nullptr, Ops[0], Ops.slice(1), cast<GEPOperator>(this)->isInBounds(), OnlyIfReducedTy); case Instruction::ICmp: @@ -1925,7 +1925,7 @@ Constant *ConstantExpr::getSizeOf(Type* Ty) { // Note that a non-inbounds gep is used, as null isn't within any object. Constant *GEPIdx = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1); Constant *GEP = getGetElementPtr( - Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx); + Ty, Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx); return getPtrToInt(GEP, Type::getInt64Ty(Ty->getContext())); } @@ -1939,7 +1939,7 @@ Constant *ConstantExpr::getAlignOf(Type* Ty) { Constant *Zero = ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0); Constant *One = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1); Constant *Indices[2] = { Zero, One }; - Constant *GEP = getGetElementPtr(NullPtr, Indices); + Constant *GEP = getGetElementPtr(AligningTy, NullPtr, Indices); return getPtrToInt(GEP, Type::getInt64Ty(Ty->getContext())); } @@ -1957,7 +1957,7 @@ Constant *ConstantExpr::getOffsetOf(Type* Ty, Constant *FieldNo) { FieldNo }; Constant *GEP = getGetElementPtr( - Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx); + Ty, Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx); return getPtrToInt(GEP, Type::getInt64Ty(Ty->getContext())); } @@ -2001,19 +2001,22 @@ Constant *ConstantExpr::getSelect(Constant *C, Constant *V1, Constant *V2, return pImpl->ExprConstants.getOrCreate(V1->getType(), Key); } -Constant *ConstantExpr::getGetElementPtr(Constant *C, ArrayRef<Value *> Idxs, - bool InBounds, Type *OnlyIfReducedTy) { - assert(C->getType()->isPtrOrPtrVectorTy() && - "Non-pointer type for constant GetElementPtr expression"); - +Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C, + ArrayRef<Value *> Idxs, bool InBounds, + Type *OnlyIfReducedTy) { if (Constant *FC = ConstantFoldGetElementPtr(C, InBounds, Idxs)) return FC; // Fold a few common cases. + if (!Ty) + Ty = cast<PointerType>(C->getType()->getScalarType())->getElementType(); + else + assert(Ty == + cast<PointerType>(C->getType()->getScalarType())->getElementType()); // Get the result type of the getelementptr! - Type *Ty = GetElementPtrInst::getIndexedType(C->getType(), Idxs); - assert(Ty && "GEP indices invalid!"); + Type *DestTy = GetElementPtrInst::getIndexedType(Ty, Idxs); + assert(DestTy && "GEP indices invalid!"); unsigned AS = C->getType()->getPointerAddressSpace(); - Type *ReqTy = Ty->getPointerTo(AS); + Type *ReqTy = DestTy->getPointerTo(AS); if (VectorType *VecTy = dyn_cast<VectorType>(C->getType())) ReqTy = VectorType::get(ReqTy, VecTy->getNumElements()); diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index 613147e..7fe7beb 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -1153,8 +1153,8 @@ LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal, LLVMValueRef *ConstantIndices, unsigned NumIndices) { ArrayRef<Constant *> IdxList(unwrap<Constant>(ConstantIndices, NumIndices), NumIndices); - return wrap(ConstantExpr::getGetElementPtr(unwrap<Constant>(ConstantVal), - IdxList)); + return wrap(ConstantExpr::getGetElementPtr( + nullptr, unwrap<Constant>(ConstantVal), IdxList)); } LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal, @@ -1163,7 +1163,7 @@ LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal, Constant* Val = unwrap<Constant>(ConstantVal); ArrayRef<Constant *> IdxList(unwrap<Constant>(ConstantIndices, NumIndices), NumIndices); - return wrap(ConstantExpr::getInBoundsGetElementPtr(Val, IdxList)); + return wrap(ConstantExpr::getInBoundsGetElementPtr(nullptr, Val, IdxList)); } LLVMValueRef LLVMConstTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) { @@ -2181,14 +2181,13 @@ void LLVMDisposeBuilder(LLVMBuilderRef Builder) { void LLVMSetCurrentDebugLocation(LLVMBuilderRef Builder, LLVMValueRef L) { MDNode *Loc = L ? cast<MDNode>(unwrap<MetadataAsValue>(L)->getMetadata()) : nullptr; - unwrap(Builder)->SetCurrentDebugLocation(DebugLoc::getFromDILocation(Loc)); + unwrap(Builder)->SetCurrentDebugLocation(DebugLoc(Loc)); } LLVMValueRef LLVMGetCurrentDebugLocation(LLVMBuilderRef Builder) { LLVMContext &Context = unwrap(Builder)->getContext(); return wrap(MetadataAsValue::get( - Context, - unwrap(Builder)->getCurrentDebugLocation().getAsMDNode(Context))); + Context, unwrap(Builder)->getCurrentDebugLocation().getAsMDNode())); } void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst) { @@ -2513,12 +2512,13 @@ LLVMValueRef LLVMBuildInBoundsGEP(LLVMBuilderRef B, LLVMValueRef Pointer, LLVMValueRef *Indices, unsigned NumIndices, const char *Name) { ArrayRef<Value *> IdxList(unwrap(Indices), NumIndices); - return wrap(unwrap(B)->CreateInBoundsGEP(unwrap(Pointer), IdxList, Name)); + return wrap( + unwrap(B)->CreateInBoundsGEP(nullptr, unwrap(Pointer), IdxList, Name)); } LLVMValueRef LLVMBuildStructGEP(LLVMBuilderRef B, LLVMValueRef Pointer, unsigned Idx, const char *Name) { - return wrap(unwrap(B)->CreateStructGEP(unwrap(Pointer), Idx, Name)); + return wrap(unwrap(B)->CreateStructGEP(nullptr, unwrap(Pointer), Idx, Name)); } LLVMValueRef LLVMBuildGlobalString(LLVMBuilderRef B, const char *Str, diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp index 9677de4..891fb86 100644 --- a/lib/IR/DIBuilder.cpp +++ b/lib/IR/DIBuilder.cpp @@ -74,8 +74,7 @@ void DIBuilder::trackIfUnresolved(MDNode *N) { } void DIBuilder::finalize() { - DIArray Enums = getOrCreateArray(AllEnumTypes); - DIType(TempEnumTypes).replaceAllUsesWith(Enums); + TempEnumTypes->replaceAllUsesWith(MDTuple::get(VMContext, AllEnumTypes)); SmallVector<Metadata *, 16> RetainValues; // Declarations and definitions of the same type may be retained. Some @@ -86,28 +85,24 @@ void DIBuilder::finalize() { for (unsigned I = 0, E = AllRetainTypes.size(); I < E; I++) if (RetainSet.insert(AllRetainTypes[I]).second) RetainValues.push_back(AllRetainTypes[I]); - DIArray RetainTypes = getOrCreateArray(RetainValues); - DIType(TempRetainTypes).replaceAllUsesWith(RetainTypes); - - DIArray SPs = getOrCreateArray(AllSubprograms); - DIType(TempSubprograms).replaceAllUsesWith(SPs); - for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { - DISubprogram SP(SPs.getElement(i)); - if (MDNode *Temp = SP.getVariablesNodes()) { + TempRetainTypes->replaceAllUsesWith(MDTuple::get(VMContext, RetainValues)); + + MDSubprogramArray SPs = MDTuple::get(VMContext, AllSubprograms); + TempSubprograms->replaceAllUsesWith(SPs.get()); + for (auto *SP : SPs) { + if (MDTuple *Temp = SP->getVariables().get()) { const auto &PV = PreservedVariables.lookup(SP); SmallVector<Metadata *, 4> Variables(PV.begin(), PV.end()); DIArray AV = getOrCreateArray(Variables); - DIType(Temp).replaceAllUsesWith(AV); + TempMDTuple(Temp)->replaceAllUsesWith(AV.get()); } } - DIArray GVs = getOrCreateArray(AllGVs); - DIType(TempGVs).replaceAllUsesWith(GVs); + TempGVs->replaceAllUsesWith(MDTuple::get(VMContext, AllGVs)); - SmallVector<Metadata *, 16> RetainValuesI(AllImportedModules.begin(), - AllImportedModules.end()); - DIArray IMs = getOrCreateArray(RetainValuesI); - DIType(TempImportedModules).replaceAllUsesWith(IMs); + TempImportedModules->replaceAllUsesWith(MDTuple::get( + VMContext, SmallVector<Metadata *, 16>(AllImportedModules.begin(), + AllImportedModules.end()))); // Now that all temp nodes have been replaced or deleted, resolve remaining // cycles. @@ -121,19 +116,16 @@ void DIBuilder::finalize() { } /// If N is compile unit return NULL otherwise return N. -static MDScope *getNonCompileUnitScope(MDNode *N) { +static MDScope *getNonCompileUnitScope(MDScope *N) { if (!N || isa<MDCompileUnit>(N)) return nullptr; return cast<MDScope>(N); } -DICompileUnit DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, - StringRef Directory, - StringRef Producer, bool isOptimized, - StringRef Flags, unsigned RunTimeVer, - StringRef SplitName, - DebugEmissionKind Kind, - bool EmitDebugInfo) { +MDCompileUnit *DIBuilder::createCompileUnit( + unsigned Lang, StringRef Filename, StringRef Directory, StringRef Producer, + bool isOptimized, StringRef Flags, unsigned RunTimeVer, StringRef SplitName, + DebugEmissionKind Kind, bool EmitDebugInfo) { assert(((Lang <= dwarf::DW_LANG_Fortran08 && Lang >= dwarf::DW_LANG_C89) || (Lang <= dwarf::DW_LANG_hi_user && Lang >= dwarf::DW_LANG_lo_user)) && @@ -143,18 +135,19 @@ DICompileUnit DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, // TODO: Once we make MDCompileUnit distinct, stop using temporaries here // (just start with operands assigned to nullptr). - TempEnumTypes = MDTuple::getTemporary(VMContext, None).release(); - TempRetainTypes = MDTuple::getTemporary(VMContext, None).release(); - TempSubprograms = MDTuple::getTemporary(VMContext, None).release(); - TempGVs = MDTuple::getTemporary(VMContext, None).release(); - TempImportedModules = MDTuple::getTemporary(VMContext, None).release(); + TempEnumTypes = MDTuple::getTemporary(VMContext, None); + TempRetainTypes = MDTuple::getTemporary(VMContext, None); + TempSubprograms = MDTuple::getTemporary(VMContext, None); + TempGVs = MDTuple::getTemporary(VMContext, None); + TempImportedModules = MDTuple::getTemporary(VMContext, None); // TODO: Switch to getDistinct(). We never want to merge compile units based // on contents. - MDNode *CUNode = MDCompileUnit::get( + MDCompileUnit *CUNode = MDCompileUnit::get( VMContext, Lang, MDFile::get(VMContext, Filename, Directory), Producer, - isOptimized, Flags, RunTimeVer, SplitName, Kind, TempEnumTypes, - TempRetainTypes, TempSubprograms, TempGVs, TempImportedModules); + isOptimized, Flags, RunTimeVer, SplitName, Kind, TempEnumTypes.get(), + TempRetainTypes.get(), TempSubprograms.get(), TempGVs.get(), + TempImportedModules.get()); // Create a named metadata so that it is easier to find cu in a module. // Note that we only generate this when the caller wants to actually @@ -167,141 +160,136 @@ DICompileUnit DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, } trackIfUnresolved(CUNode); - return DICompileUnit(CUNode); + return CUNode; } -static DIImportedEntity -createImportedModule(LLVMContext &C, dwarf::Tag Tag, DIScope Context, +static MDImportedEntity* +createImportedModule(LLVMContext &C, dwarf::Tag Tag, MDScope* Context, Metadata *NS, unsigned Line, StringRef Name, SmallVectorImpl<TrackingMDNodeRef> &AllImportedModules) { - DIImportedEntity M = MDImportedEntity::get(C, Tag, Context, NS, Line, Name); - assert(M.Verify() && "Imported module should be valid"); - AllImportedModules.emplace_back(M.get()); + auto *M = + MDImportedEntity::get(C, Tag, Context, DebugNodeRef(NS), Line, Name); + AllImportedModules.emplace_back(M); return M; } -DIImportedEntity DIBuilder::createImportedModule(DIScope Context, - DINameSpace NS, +MDImportedEntity* DIBuilder::createImportedModule(MDScope* Context, + MDNamespace* NS, unsigned Line) { return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module, Context, NS, Line, StringRef(), AllImportedModules); } -DIImportedEntity DIBuilder::createImportedModule(DIScope Context, - DIImportedEntity NS, +MDImportedEntity* DIBuilder::createImportedModule(MDScope* Context, + MDImportedEntity* NS, unsigned Line) { return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module, Context, NS, Line, StringRef(), AllImportedModules); } -DIImportedEntity DIBuilder::createImportedDeclaration(DIScope Context, - DIDescriptor Decl, - unsigned Line, StringRef Name) { +MDImportedEntity *DIBuilder::createImportedDeclaration(MDScope *Context, + DebugNode *Decl, + unsigned Line, + StringRef Name) { // Make sure to use the unique identifier based metadata reference for // types that have one. - Metadata *V = - Decl.isType() ? static_cast<Metadata *>(DIType(Decl).getRef()) : Decl; return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_declaration, - Context, V, Line, Name, + Context, DebugNodeRef::get(Decl), Line, Name, AllImportedModules); } -DIImportedEntity DIBuilder::createImportedDeclaration(DIScope Context, - DIImportedEntity Imp, - unsigned Line, StringRef Name) { - return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_declaration, - Context, Imp, Line, Name, AllImportedModules); -} - -DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) { +MDFile* DIBuilder::createFile(StringRef Filename, StringRef Directory) { return MDFile::get(VMContext, Filename, Directory); } -DIEnumerator DIBuilder::createEnumerator(StringRef Name, int64_t Val) { +MDEnumerator *DIBuilder::createEnumerator(StringRef Name, int64_t Val) { assert(!Name.empty() && "Unable to create enumerator without name"); return MDEnumerator::get(VMContext, Val, Name); } -DIBasicType DIBuilder::createUnspecifiedType(StringRef Name) { +MDBasicType *DIBuilder::createUnspecifiedType(StringRef Name) { assert(!Name.empty() && "Unable to create type without name"); return MDBasicType::get(VMContext, dwarf::DW_TAG_unspecified_type, Name); } -DIBasicType DIBuilder::createNullPtrType() { +MDBasicType *DIBuilder::createNullPtrType() { return createUnspecifiedType("decltype(nullptr)"); } -DIBasicType -DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, - uint64_t AlignInBits, unsigned Encoding) { +MDBasicType *DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, + uint64_t AlignInBits, + unsigned Encoding) { assert(!Name.empty() && "Unable to create type without name"); return MDBasicType::get(VMContext, dwarf::DW_TAG_base_type, Name, SizeInBits, AlignInBits, Encoding); } -DIDerivedType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) { +MDDerivedType *DIBuilder::createQualifiedType(unsigned Tag, MDType *FromTy) { return MDDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr, - FromTy.getRef(), 0, 0, 0, 0); + MDTypeRef::get(FromTy), 0, 0, 0, 0); } -DIDerivedType -DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits, - uint64_t AlignInBits, StringRef Name) { +MDDerivedType *DIBuilder::createPointerType(MDType *PointeeTy, + uint64_t SizeInBits, + uint64_t AlignInBits, + StringRef Name) { // FIXME: Why is there a name here? return MDDerivedType::get(VMContext, dwarf::DW_TAG_pointer_type, Name, - nullptr, 0, nullptr, PointeeTy.getRef(), SizeInBits, - AlignInBits, 0, 0); + nullptr, 0, nullptr, MDTypeRef::get(PointeeTy), + SizeInBits, AlignInBits, 0, 0); } -DIDerivedType -DIBuilder::createMemberPointerType(DIType PointeeTy, DIType Base, - uint64_t SizeInBits, uint64_t AlignInBits) { +MDDerivedType *DIBuilder::createMemberPointerType(MDType *PointeeTy, + MDType *Base, + uint64_t SizeInBits, + uint64_t AlignInBits) { return MDDerivedType::get(VMContext, dwarf::DW_TAG_ptr_to_member_type, "", - nullptr, 0, nullptr, PointeeTy.getRef(), SizeInBits, - AlignInBits, 0, 0, Base.getRef()); + nullptr, 0, nullptr, MDTypeRef::get(PointeeTy), + SizeInBits, AlignInBits, 0, 0, MDTypeRef::get(Base)); } -DIDerivedType DIBuilder::createReferenceType(unsigned Tag, DIType RTy) { - assert(RTy.isType() && "Unable to create reference type"); +MDDerivedType *DIBuilder::createReferenceType(unsigned Tag, MDType *RTy) { + assert(RTy && "Unable to create reference type"); return MDDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr, - RTy.getRef(), 0, 0, 0, 0); + MDTypeRef::get(RTy), 0, 0, 0, 0); } -DIDerivedType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File, - unsigned LineNo, DIDescriptor Context) { - return MDDerivedType::get(VMContext, dwarf::DW_TAG_typedef, Name, - File.getFileNode(), LineNo, - DIScope(getNonCompileUnitScope(Context)).getRef(), - Ty.getRef(), 0, 0, 0, 0); +MDDerivedType *DIBuilder::createTypedef(MDType *Ty, StringRef Name, + MDFile *File, unsigned LineNo, + MDScope *Context) { + return MDDerivedType::get(VMContext, dwarf::DW_TAG_typedef, Name, File, + LineNo, + MDScopeRef::get(getNonCompileUnitScope(Context)), + MDTypeRef::get(Ty), 0, 0, 0, 0); } -DIDerivedType DIBuilder::createFriend(DIType Ty, DIType FriendTy) { - // typedefs are encoded in DIDerivedType format. - assert(Ty.isType() && "Invalid type!"); - assert(FriendTy.isType() && "Invalid friend type!"); +MDDerivedType *DIBuilder::createFriend(MDType *Ty, MDType *FriendTy) { + assert(Ty && "Invalid type!"); + assert(FriendTy && "Invalid friend type!"); return MDDerivedType::get(VMContext, dwarf::DW_TAG_friend, "", nullptr, 0, - Ty.getRef(), FriendTy.getRef(), 0, 0, 0, 0); + MDTypeRef::get(Ty), MDTypeRef::get(FriendTy), 0, 0, + 0, 0); } -DIDerivedType DIBuilder::createInheritance(DIType Ty, DIType BaseTy, - uint64_t BaseOffset, - unsigned Flags) { - assert(Ty.isType() && "Unable to create inheritance"); +MDDerivedType *DIBuilder::createInheritance(MDType *Ty, MDType *BaseTy, + uint64_t BaseOffset, + unsigned Flags) { + assert(Ty && "Unable to create inheritance"); return MDDerivedType::get(VMContext, dwarf::DW_TAG_inheritance, "", nullptr, - 0, Ty.getRef(), BaseTy.getRef(), 0, 0, BaseOffset, - Flags); + 0, MDTypeRef::get(Ty), MDTypeRef::get(BaseTy), 0, 0, + BaseOffset, Flags); } -DIDerivedType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name, - DIFile File, unsigned LineNumber, - uint64_t SizeInBits, - uint64_t AlignInBits, - uint64_t OffsetInBits, unsigned Flags, - DIType Ty) { +MDDerivedType *DIBuilder::createMemberType(MDScope *Scope, StringRef Name, + MDFile *File, unsigned LineNumber, + uint64_t SizeInBits, + uint64_t AlignInBits, + uint64_t OffsetInBits, + unsigned Flags, MDType *Ty) { return MDDerivedType::get( VMContext, dwarf::DW_TAG_member, Name, File, LineNumber, - DIScope(getNonCompileUnitScope(Scope)).getRef(), Ty.getRef(), SizeInBits, - AlignInBits, OffsetInBits, Flags); + MDScopeRef::get(getNonCompileUnitScope(Scope)), MDTypeRef::get(Ty), + SizeInBits, AlignInBits, OffsetInBits, Flags); } static ConstantAsMetadata *getConstantOrNull(Constant *C) { @@ -310,135 +298,124 @@ static ConstantAsMetadata *getConstantOrNull(Constant *C) { return nullptr; } -DIDerivedType DIBuilder::createStaticMemberType(DIDescriptor Scope, - StringRef Name, DIFile File, - unsigned LineNumber, DIType Ty, - unsigned Flags, - llvm::Constant *Val) { - // TAG_member is encoded in DIDerivedType format. - Flags |= DIDescriptor::FlagStaticMember; +MDDerivedType *DIBuilder::createStaticMemberType(MDScope *Scope, StringRef Name, + MDFile *File, + unsigned LineNumber, + MDType *Ty, unsigned Flags, + llvm::Constant *Val) { + Flags |= DebugNode::FlagStaticMember; return MDDerivedType::get( VMContext, dwarf::DW_TAG_member, Name, File, LineNumber, - DIScope(getNonCompileUnitScope(Scope)).getRef(), Ty.getRef(), 0, 0, 0, - Flags, getConstantOrNull(Val)); + MDScopeRef::get(getNonCompileUnitScope(Scope)), MDTypeRef::get(Ty), 0, 0, + 0, Flags, getConstantOrNull(Val)); } -DIDerivedType DIBuilder::createObjCIVar(StringRef Name, DIFile File, - unsigned LineNumber, - uint64_t SizeInBits, - uint64_t AlignInBits, - uint64_t OffsetInBits, unsigned Flags, - DIType Ty, MDNode *PropertyNode) { - return MDDerivedType::get(VMContext, dwarf::DW_TAG_member, Name, File, - LineNumber, getNonCompileUnitScope(File), - Ty.getRef(), SizeInBits, AlignInBits, OffsetInBits, - Flags, PropertyNode); +MDDerivedType *DIBuilder::createObjCIVar(StringRef Name, MDFile *File, + unsigned LineNumber, + uint64_t SizeInBits, + uint64_t AlignInBits, + uint64_t OffsetInBits, unsigned Flags, + MDType *Ty, MDNode *PropertyNode) { + return MDDerivedType::get( + VMContext, dwarf::DW_TAG_member, Name, File, LineNumber, + MDScopeRef::get(getNonCompileUnitScope(File)), MDTypeRef::get(Ty), + SizeInBits, AlignInBits, OffsetInBits, Flags, PropertyNode); } -DIObjCProperty -DIBuilder::createObjCProperty(StringRef Name, DIFile File, unsigned LineNumber, +MDObjCProperty * +DIBuilder::createObjCProperty(StringRef Name, MDFile *File, unsigned LineNumber, StringRef GetterName, StringRef SetterName, - unsigned PropertyAttributes, DIType Ty) { + unsigned PropertyAttributes, MDType *Ty) { return MDObjCProperty::get(VMContext, Name, File, LineNumber, GetterName, SetterName, PropertyAttributes, Ty); } -DITemplateTypeParameter -DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name, - DIType Ty) { - assert(!DIScope(getNonCompileUnitScope(Context)).getRef() && - "Expected compile unit"); - return MDTemplateTypeParameter::get(VMContext, Name, Ty.getRef()); +MDTemplateTypeParameter * +DIBuilder::createTemplateTypeParameter(MDScope *Context, StringRef Name, + MDType *Ty) { + assert((!Context || isa<MDCompileUnit>(Context)) && "Expected compile unit"); + return MDTemplateTypeParameter::get(VMContext, Name, MDTypeRef::get(Ty)); } -static DITemplateValueParameter +static MDTemplateValueParameter * createTemplateValueParameterHelper(LLVMContext &VMContext, unsigned Tag, - DIDescriptor Context, StringRef Name, - DIType Ty, Metadata *MD) { - assert(!DIScope(getNonCompileUnitScope(Context)).getRef() && - "Expected compile unit"); - return MDTemplateValueParameter::get(VMContext, Tag, Name, Ty.getRef(), MD); + MDScope *Context, StringRef Name, MDType *Ty, + Metadata *MD) { + assert((!Context || isa<MDCompileUnit>(Context)) && "Expected compile unit"); + return MDTemplateValueParameter::get(VMContext, Tag, Name, MDTypeRef::get(Ty), + MD); } -DITemplateValueParameter -DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name, - DIType Ty, Constant *Val) { +MDTemplateValueParameter * +DIBuilder::createTemplateValueParameter(MDScope *Context, StringRef Name, + MDType *Ty, Constant *Val) { return createTemplateValueParameterHelper( VMContext, dwarf::DW_TAG_template_value_parameter, Context, Name, Ty, getConstantOrNull(Val)); } -DITemplateValueParameter -DIBuilder::createTemplateTemplateParameter(DIDescriptor Context, StringRef Name, - DIType Ty, StringRef Val) { +MDTemplateValueParameter * +DIBuilder::createTemplateTemplateParameter(MDScope *Context, StringRef Name, + MDType *Ty, StringRef Val) { return createTemplateValueParameterHelper( VMContext, dwarf::DW_TAG_GNU_template_template_param, Context, Name, Ty, MDString::get(VMContext, Val)); } -DITemplateValueParameter -DIBuilder::createTemplateParameterPack(DIDescriptor Context, StringRef Name, - DIType Ty, DIArray Val) { +MDTemplateValueParameter * +DIBuilder::createTemplateParameterPack(MDScope *Context, StringRef Name, + MDType *Ty, DIArray Val) { return createTemplateValueParameterHelper( VMContext, dwarf::DW_TAG_GNU_template_parameter_pack, Context, Name, Ty, - Val); + Val.get()); } -DICompositeType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, - DIFile File, unsigned LineNumber, - uint64_t SizeInBits, - uint64_t AlignInBits, - uint64_t OffsetInBits, - unsigned Flags, DIType DerivedFrom, - DIArray Elements, - DIType VTableHolder, - MDNode *TemplateParams, - StringRef UniqueIdentifier) { - assert((!Context || Context.isScope() || Context.isType()) && +MDCompositeType *DIBuilder::createClassType( + MDScope *Context, StringRef Name, MDFile *File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits, + unsigned Flags, MDType *DerivedFrom, DIArray Elements, MDType *VTableHolder, + MDNode *TemplateParams, StringRef UniqueIdentifier) { + assert((!Context || isa<MDScope>(Context)) && "createClassType should be called with a valid Context"); - // TAG_class_type is encoded in DICompositeType format. - DICompositeType R = MDCompositeType::get( + + auto *R = MDCompositeType::get( VMContext, dwarf::DW_TAG_structure_type, Name, File, LineNumber, - DIScope(getNonCompileUnitScope(Context)).getRef(), DerivedFrom.getRef(), - SizeInBits, AlignInBits, OffsetInBits, Flags, Elements, 0, - VTableHolder.getRef(), TemplateParams, UniqueIdentifier); + MDScopeRef::get(getNonCompileUnitScope(Context)), + MDTypeRef::get(DerivedFrom), SizeInBits, AlignInBits, OffsetInBits, Flags, + Elements, 0, MDTypeRef::get(VTableHolder), + cast_or_null<MDTuple>(TemplateParams), UniqueIdentifier); if (!UniqueIdentifier.empty()) retainType(R); trackIfUnresolved(R); return R; } -DICompositeType DIBuilder::createStructType(DIDescriptor Context, - StringRef Name, DIFile File, - unsigned LineNumber, - uint64_t SizeInBits, - uint64_t AlignInBits, - unsigned Flags, DIType DerivedFrom, - DIArray Elements, - unsigned RunTimeLang, - DIType VTableHolder, - StringRef UniqueIdentifier) { - DICompositeType R = MDCompositeType::get( +MDCompositeType *DIBuilder::createStructType( + MDScope *Context, StringRef Name, MDFile *File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, unsigned Flags, + MDType *DerivedFrom, DIArray Elements, unsigned RunTimeLang, + MDType *VTableHolder, StringRef UniqueIdentifier) { + auto *R = MDCompositeType::get( VMContext, dwarf::DW_TAG_structure_type, Name, File, LineNumber, - DIScope(getNonCompileUnitScope(Context)).getRef(), DerivedFrom.getRef(), - SizeInBits, AlignInBits, 0, Flags, Elements, RunTimeLang, - VTableHolder.getRef(), nullptr, UniqueIdentifier); + MDScopeRef::get(getNonCompileUnitScope(Context)), + MDTypeRef::get(DerivedFrom), SizeInBits, AlignInBits, 0, Flags, Elements, + RunTimeLang, MDTypeRef::get(VTableHolder), nullptr, UniqueIdentifier); if (!UniqueIdentifier.empty()) retainType(R); trackIfUnresolved(R); return R; } -DICompositeType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, - DIFile File, unsigned LineNumber, +MDCompositeType* DIBuilder::createUnionType(MDScope * Scope, StringRef Name, + MDFile* File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, unsigned Flags, DIArray Elements, unsigned RunTimeLang, StringRef UniqueIdentifier) { - DICompositeType R = MDCompositeType::get( + auto *R = MDCompositeType::get( VMContext, dwarf::DW_TAG_union_type, Name, File, LineNumber, - DIScope(getNonCompileUnitScope(Scope)).getRef(), nullptr, SizeInBits, + MDScopeRef::get(getNonCompileUnitScope(Scope)), nullptr, SizeInBits, AlignInBits, 0, Flags, Elements, RunTimeLang, nullptr, nullptr, UniqueIdentifier); if (!UniqueIdentifier.empty()) @@ -447,21 +424,21 @@ DICompositeType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, return R; } -DISubroutineType DIBuilder::createSubroutineType(DIFile File, - DITypeArray ParameterTypes, - unsigned Flags) { +MDSubroutineType *DIBuilder::createSubroutineType(MDFile *File, + DITypeArray ParameterTypes, + unsigned Flags) { return MDSubroutineType::get(VMContext, Flags, ParameterTypes); } -DICompositeType DIBuilder::createEnumerationType( - DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber, +MDCompositeType *DIBuilder::createEnumerationType( + MDScope *Scope, StringRef Name, MDFile *File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, DIArray Elements, - DIType UnderlyingType, StringRef UniqueIdentifier) { - DICompositeType CTy = MDCompositeType::get( + MDType *UnderlyingType, StringRef UniqueIdentifier) { + auto *CTy = MDCompositeType::get( VMContext, dwarf::DW_TAG_enumeration_type, Name, File, LineNumber, - DIScope(getNonCompileUnitScope(Scope)).getRef(), UnderlyingType.getRef(), - SizeInBits, AlignInBits, 0, 0, Elements, 0, nullptr, nullptr, - UniqueIdentifier); + MDScopeRef::get(getNonCompileUnitScope(Scope)), + MDTypeRef::get(UnderlyingType), SizeInBits, AlignInBits, 0, 0, Elements, + 0, nullptr, nullptr, UniqueIdentifier); AllEnumTypes.push_back(CTy); if (!UniqueIdentifier.empty()) retainType(CTy); @@ -469,63 +446,66 @@ DICompositeType DIBuilder::createEnumerationType( return CTy; } -DICompositeType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, - DIType Ty, DIArray Subscripts) { +MDCompositeType *DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, + MDType *Ty, DIArray Subscripts) { auto *R = MDCompositeType::get(VMContext, dwarf::DW_TAG_array_type, "", - nullptr, 0, nullptr, Ty.getRef(), Size, + nullptr, 0, nullptr, MDTypeRef::get(Ty), Size, AlignInBits, 0, 0, Subscripts, 0, nullptr); trackIfUnresolved(R); return R; } -DICompositeType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits, - DIType Ty, DIArray Subscripts) { - auto *R = MDCompositeType::get( - VMContext, dwarf::DW_TAG_array_type, "", nullptr, 0, nullptr, Ty.getRef(), - Size, AlignInBits, 0, DIType::FlagVector, Subscripts, 0, nullptr); +MDCompositeType *DIBuilder::createVectorType(uint64_t Size, + uint64_t AlignInBits, MDType *Ty, + DIArray Subscripts) { + auto *R = + MDCompositeType::get(VMContext, dwarf::DW_TAG_array_type, "", nullptr, 0, + nullptr, MDTypeRef::get(Ty), Size, AlignInBits, 0, + DebugNode::FlagVector, Subscripts, 0, nullptr); trackIfUnresolved(R); return R; } -static DIType createTypeWithFlags(LLVMContext &Context, DIType Ty, - unsigned FlagsToSet) { - TempMDType NewTy = cast<MDType>(static_cast<MDNode *>(Ty))->clone(); +static MDType *createTypeWithFlags(LLVMContext &Context, MDType *Ty, + unsigned FlagsToSet) { + auto NewTy = Ty->clone(); NewTy->setFlags(NewTy->getFlags() | FlagsToSet); return MDNode::replaceWithUniqued(std::move(NewTy)); } -DIType DIBuilder::createArtificialType(DIType Ty) { +MDType *DIBuilder::createArtificialType(MDType *Ty) { // FIXME: Restrict this to the nodes where it's valid. - if (Ty.isArtificial()) + if (Ty->isArtificial()) return Ty; - return createTypeWithFlags(VMContext, Ty, DIType::FlagArtificial); + return createTypeWithFlags(VMContext, Ty, DebugNode::FlagArtificial); } -DIType DIBuilder::createObjectPointerType(DIType Ty) { +MDType *DIBuilder::createObjectPointerType(MDType *Ty) { // FIXME: Restrict this to the nodes where it's valid. - if (Ty.isObjectPointer()) + if (Ty->isObjectPointer()) return Ty; - unsigned Flags = DIType::FlagObjectPointer | DIType::FlagArtificial; + unsigned Flags = DebugNode::FlagObjectPointer | DebugNode::FlagArtificial; return createTypeWithFlags(VMContext, Ty, Flags); } -void DIBuilder::retainType(DIType T) { AllRetainTypes.emplace_back(T); } - -DIBasicType DIBuilder::createUnspecifiedParameter() { - return DIBasicType(); +void DIBuilder::retainType(MDType *T) { + assert(T && "Expected non-null type"); + AllRetainTypes.emplace_back(T); } -DICompositeType -DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, DIDescriptor Scope, - DIFile F, unsigned Line, unsigned RuntimeLang, +MDBasicType *DIBuilder::createUnspecifiedParameter() { return nullptr; } + +MDCompositeType* +DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, MDScope * Scope, + MDFile* F, unsigned Line, unsigned RuntimeLang, uint64_t SizeInBits, uint64_t AlignInBits, StringRef UniqueIdentifier) { // FIXME: Define in terms of createReplaceableForwardDecl() by calling // replaceWithUniqued(). - DICompositeType RetTy = MDCompositeType::get( - VMContext, Tag, Name, F.getFileNode(), Line, - DIScope(getNonCompileUnitScope(Scope)).getRef(), nullptr, SizeInBits, - AlignInBits, 0, DIDescriptor::FlagFwdDecl, nullptr, RuntimeLang, nullptr, + auto *RetTy = MDCompositeType::get( + VMContext, Tag, Name, F, Line, + MDScopeRef::get(getNonCompileUnitScope(Scope)), nullptr, SizeInBits, + AlignInBits, 0, DebugNode::FlagFwdDecl, nullptr, RuntimeLang, nullptr, nullptr, UniqueIdentifier); if (!UniqueIdentifier.empty()) retainType(RetTy); @@ -533,16 +513,15 @@ DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, DIDescriptor Scope, return RetTy; } -DICompositeType DIBuilder::createReplaceableCompositeType( - unsigned Tag, StringRef Name, DIDescriptor Scope, DIFile F, unsigned Line, +MDCompositeType* DIBuilder::createReplaceableCompositeType( + unsigned Tag, StringRef Name, MDScope * Scope, MDFile* F, unsigned Line, unsigned RuntimeLang, uint64_t SizeInBits, uint64_t AlignInBits, unsigned Flags, StringRef UniqueIdentifier) { - DICompositeType RetTy = - MDCompositeType::getTemporary( - VMContext, Tag, Name, F.getFileNode(), Line, - DIScope(getNonCompileUnitScope(Scope)).getRef(), nullptr, SizeInBits, - AlignInBits, 0, Flags, nullptr, RuntimeLang, - nullptr, nullptr, UniqueIdentifier).release(); + auto *RetTy = MDCompositeType::getTemporary( + VMContext, Tag, Name, F, Line, + MDScopeRef::get(getNonCompileUnitScope(Scope)), nullptr, + SizeInBits, AlignInBits, 0, Flags, nullptr, RuntimeLang, + nullptr, nullptr, UniqueIdentifier).release(); if (!UniqueIdentifier.empty()) retainType(RetTy); trackIfUnresolved(RetTy); @@ -550,102 +529,102 @@ DICompositeType DIBuilder::createReplaceableCompositeType( } DIArray DIBuilder::getOrCreateArray(ArrayRef<Metadata *> Elements) { - return DIArray(MDNode::get(VMContext, Elements)); + return MDTuple::get(VMContext, Elements); } DITypeArray DIBuilder::getOrCreateTypeArray(ArrayRef<Metadata *> Elements) { SmallVector<llvm::Metadata *, 16> Elts; for (unsigned i = 0, e = Elements.size(); i != e; ++i) { if (Elements[i] && isa<MDNode>(Elements[i])) - Elts.push_back(DIType(cast<MDNode>(Elements[i])).getRef()); + Elts.push_back(MDTypeRef::get(cast<MDType>(Elements[i]))); else Elts.push_back(Elements[i]); } return DITypeArray(MDNode::get(VMContext, Elts)); } -DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Count) { +MDSubrange *DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Count) { return MDSubrange::get(VMContext, Count, Lo); } -static void checkGlobalVariableScope(DIDescriptor Context) { - MDNode *TheCtx = getNonCompileUnitScope(Context); - if (DIScope(TheCtx).isCompositeType()) { - assert(!DICompositeType(TheCtx).getIdentifier() && +static void checkGlobalVariableScope(MDScope * Context) { +#ifndef NDEBUG + if (auto *CT = + dyn_cast_or_null<MDCompositeType>(getNonCompileUnitScope(Context))) + assert(CT->getIdentifier().empty() && "Context of a global variable should not be a type with identifier"); - } +#endif } -DIGlobalVariable DIBuilder::createGlobalVariable( - DIDescriptor Context, StringRef Name, StringRef LinkageName, DIFile F, - unsigned LineNumber, DITypeRef Ty, bool isLocalToUnit, Constant *Val, +MDGlobalVariable *DIBuilder::createGlobalVariable( + MDScope *Context, StringRef Name, StringRef LinkageName, MDFile *F, + unsigned LineNumber, MDType *Ty, bool isLocalToUnit, Constant *Val, MDNode *Decl) { checkGlobalVariableScope(Context); - auto *N = MDGlobalVariable::get(VMContext, Context, Name, LinkageName, F, - LineNumber, Ty, isLocalToUnit, true, - getConstantOrNull(Val), Decl); + auto *N = MDGlobalVariable::get(VMContext, cast_or_null<MDScope>(Context), + Name, LinkageName, F, LineNumber, + MDTypeRef::get(Ty), isLocalToUnit, true, Val, + cast_or_null<MDDerivedType>(Decl)); AllGVs.push_back(N); return N; } -DIGlobalVariable DIBuilder::createTempGlobalVariableFwdDecl( - DIDescriptor Context, StringRef Name, StringRef LinkageName, DIFile F, - unsigned LineNumber, DITypeRef Ty, bool isLocalToUnit, Constant *Val, +MDGlobalVariable *DIBuilder::createTempGlobalVariableFwdDecl( + MDScope *Context, StringRef Name, StringRef LinkageName, MDFile *F, + unsigned LineNumber, MDType *Ty, bool isLocalToUnit, Constant *Val, MDNode *Decl) { checkGlobalVariableScope(Context); - return MDGlobalVariable::getTemporary(VMContext, Context, Name, LinkageName, - F, LineNumber, Ty, isLocalToUnit, false, - getConstantOrNull(Val), Decl).release(); + return MDGlobalVariable::getTemporary( + VMContext, cast_or_null<MDScope>(Context), Name, LinkageName, F, + LineNumber, MDTypeRef::get(Ty), isLocalToUnit, false, Val, + cast_or_null<MDDerivedType>(Decl)) + .release(); } -DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope, - StringRef Name, DIFile File, - unsigned LineNo, DITypeRef Ty, - bool AlwaysPreserve, unsigned Flags, - unsigned ArgNo) { +MDLocalVariable *DIBuilder::createLocalVariable( + unsigned Tag, MDScope *Scope, StringRef Name, MDFile *File, unsigned LineNo, + MDType *Ty, bool AlwaysPreserve, unsigned Flags, unsigned ArgNo) { // FIXME: Why getNonCompileUnitScope()? // FIXME: Why is "!Context" okay here? // FIXME: WHy doesn't this check for a subprogram or lexical block (AFAICT // the only valid scopes)? - DIDescriptor Context(getNonCompileUnitScope(Scope)); - assert((!Context || Context.isScope()) && - "createLocalVariable should be called with a valid Context"); + MDScope* Context = getNonCompileUnitScope(Scope); - auto *Node = - MDLocalVariable::get(VMContext, Tag, getNonCompileUnitScope(Scope), Name, - File, LineNo, Ty, ArgNo, Flags); + auto *Node = MDLocalVariable::get( + VMContext, Tag, cast_or_null<MDLocalScope>(Context), Name, File, LineNo, + MDTypeRef::get(Ty), ArgNo, Flags); if (AlwaysPreserve) { // The optimizer may remove local variable. If there is an interest // to preserve variable info in such situation then stash it in a // named mdnode. - DISubprogram Fn(getDISubprogram(Scope)); + MDSubprogram *Fn = getDISubprogram(Scope); assert(Fn && "Missing subprogram for local variable"); PreservedVariables[Fn].emplace_back(Node); } return Node; } -DIExpression DIBuilder::createExpression(ArrayRef<uint64_t> Addr) { +MDExpression* DIBuilder::createExpression(ArrayRef<uint64_t> Addr) { return MDExpression::get(VMContext, Addr); } -DIExpression DIBuilder::createExpression(ArrayRef<int64_t> Signed) { +MDExpression* DIBuilder::createExpression(ArrayRef<int64_t> Signed) { // TODO: Remove the callers of this signed version and delete. SmallVector<uint64_t, 8> Addr(Signed.begin(), Signed.end()); return createExpression(Addr); } -DIExpression DIBuilder::createBitPieceExpression(unsigned OffsetInBytes, +MDExpression* DIBuilder::createBitPieceExpression(unsigned OffsetInBytes, unsigned SizeInBytes) { uint64_t Addr[] = {dwarf::DW_OP_bit_piece, OffsetInBytes, SizeInBytes}; return MDExpression::get(VMContext, Addr); } -DISubprogram DIBuilder::createFunction(DIScopeRef Context, StringRef Name, - StringRef LinkageName, DIFile File, - unsigned LineNo, DICompositeType Ty, +MDSubprogram* DIBuilder::createFunction(DIScopeRef Context, StringRef Name, + StringRef LinkageName, MDFile* File, + unsigned LineNo, MDSubroutineType* Ty, bool isLocalToUnit, bool isDefinition, unsigned ScopeLine, unsigned Flags, bool isOptimized, Function *Fn, @@ -658,20 +637,21 @@ DISubprogram DIBuilder::createFunction(DIScopeRef Context, StringRef Name, Flags, isOptimized, Fn, TParams, Decl); } -DISubprogram DIBuilder::createFunction(DIDescriptor Context, StringRef Name, - StringRef LinkageName, DIFile File, - unsigned LineNo, DICompositeType Ty, +MDSubprogram* DIBuilder::createFunction(MDScope * Context, StringRef Name, + StringRef LinkageName, MDFile* File, + unsigned LineNo, MDSubroutineType* Ty, bool isLocalToUnit, bool isDefinition, unsigned ScopeLine, unsigned Flags, bool isOptimized, Function *Fn, MDNode *TParams, MDNode *Decl) { - assert(Ty.getTag() == dwarf::DW_TAG_subroutine_type && + assert(Ty->getTag() == dwarf::DW_TAG_subroutine_type && "function types should be subroutines"); auto *Node = MDSubprogram::get( - VMContext, DIScope(getNonCompileUnitScope(Context)).getRef(), Name, - LinkageName, File.getFileNode(), LineNo, Ty, isLocalToUnit, isDefinition, - ScopeLine, nullptr, 0, 0, Flags, isOptimized, getConstantOrNull(Fn), - TParams, Decl, MDNode::getTemporary(VMContext, None).release()); + VMContext, MDScopeRef::get(getNonCompileUnitScope(Context)), Name, + LinkageName, File, LineNo, Ty, + isLocalToUnit, isDefinition, ScopeLine, nullptr, 0, 0, Flags, isOptimized, + Fn, cast_or_null<MDTuple>(TParams), cast_or_null<MDSubprogram>(Decl), + MDTuple::getTemporary(VMContext, None).release()); if (isDefinition) AllSubprograms.push_back(Node); @@ -679,78 +659,64 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, StringRef Name, return Node; } -DISubprogram -DIBuilder::createTempFunctionFwdDecl(DIDescriptor Context, StringRef Name, - StringRef LinkageName, DIFile File, - unsigned LineNo, DICompositeType Ty, +MDSubprogram* +DIBuilder::createTempFunctionFwdDecl(MDScope * Context, StringRef Name, + StringRef LinkageName, MDFile* File, + unsigned LineNo, MDSubroutineType* Ty, bool isLocalToUnit, bool isDefinition, unsigned ScopeLine, unsigned Flags, bool isOptimized, Function *Fn, MDNode *TParams, MDNode *Decl) { return MDSubprogram::getTemporary( - VMContext, DIScope(getNonCompileUnitScope(Context)).getRef(), Name, - LinkageName, File.getFileNode(), LineNo, Ty, isLocalToUnit, - isDefinition, ScopeLine, nullptr, 0, 0, Flags, isOptimized, - getConstantOrNull(Fn), TParams, Decl, nullptr).release(); -} - -DISubprogram DIBuilder::createMethod(DIDescriptor Context, StringRef Name, - StringRef LinkageName, DIFile F, - unsigned LineNo, DICompositeType Ty, - bool isLocalToUnit, bool isDefinition, - unsigned VK, unsigned VIndex, - DIType VTableHolder, unsigned Flags, - bool isOptimized, Function *Fn, - MDNode *TParam) { - assert(Ty.getTag() == dwarf::DW_TAG_subroutine_type && + VMContext, MDScopeRef::get(getNonCompileUnitScope(Context)), Name, + LinkageName, File, LineNo, Ty, + isLocalToUnit, isDefinition, ScopeLine, nullptr, 0, 0, Flags, + isOptimized, Fn, cast_or_null<MDTuple>(TParams), + cast_or_null<MDSubprogram>(Decl), nullptr).release(); +} + +MDSubprogram * +DIBuilder::createMethod(MDScope *Context, StringRef Name, StringRef LinkageName, + MDFile *F, unsigned LineNo, MDSubroutineType *Ty, + bool isLocalToUnit, bool isDefinition, unsigned VK, + unsigned VIndex, MDType *VTableHolder, unsigned Flags, + bool isOptimized, Function *Fn, MDNode *TParam) { + assert(Ty->getTag() == dwarf::DW_TAG_subroutine_type && "function types should be subroutines"); assert(getNonCompileUnitScope(Context) && "Methods should have both a Context and a context that isn't " "the compile unit."); // FIXME: Do we want to use different scope/lines? - auto *Node = MDSubprogram::get( - VMContext, DIScope(Context).getRef(), Name, LinkageName, F.getFileNode(), - LineNo, Ty, isLocalToUnit, isDefinition, LineNo, VTableHolder.getRef(), - VK, VIndex, Flags, isOptimized, getConstantOrNull(Fn), TParam, nullptr, - nullptr); + auto *SP = MDSubprogram::get( + VMContext, MDScopeRef::get(cast<MDScope>(Context)), Name, LinkageName, F, + LineNo, Ty, isLocalToUnit, isDefinition, LineNo, + MDTypeRef::get(VTableHolder), VK, VIndex, Flags, isOptimized, Fn, + cast_or_null<MDTuple>(TParam), nullptr, nullptr); if (isDefinition) - AllSubprograms.push_back(Node); - DISubprogram S(Node); - assert(S.isSubprogram() && "createMethod should return a valid DISubprogram"); - trackIfUnresolved(S); - return S; -} - -DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name, - DIFile File, unsigned LineNo) { - DINameSpace R = MDNamespace::get(VMContext, getNonCompileUnitScope(Scope), - File.getFileNode(), Name, LineNo); - assert(R.Verify() && - "createNameSpace should return a verifiable DINameSpace"); - return R; + AllSubprograms.push_back(SP); + trackIfUnresolved(SP); + return SP; +} + +MDNamespace* DIBuilder::createNameSpace(MDScope * Scope, StringRef Name, + MDFile* File, unsigned LineNo) { + return MDNamespace::get(VMContext, getNonCompileUnitScope(Scope), File, Name, + LineNo); } -DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope, - DIFile File, +MDLexicalBlockFile* DIBuilder::createLexicalBlockFile(MDScope * Scope, + MDFile* File, unsigned Discriminator) { - DILexicalBlockFile R = MDLexicalBlockFile::get( - VMContext, Scope, File.getFileNode(), Discriminator); - assert( - R.Verify() && - "createLexicalBlockFile should return a verifiable DILexicalBlockFile"); - return R; + return MDLexicalBlockFile::get(VMContext, Scope, File, Discriminator); } -DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File, +MDLexicalBlock* DIBuilder::createLexicalBlock(MDScope * Scope, MDFile* File, unsigned Line, unsigned Col) { // Make these distinct, to avoid merging two lexical blocks on the same // file/line/column. - DILexicalBlock R = MDLexicalBlock::getDistinct( - VMContext, getNonCompileUnitScope(Scope), File.getFileNode(), Line, Col); - assert(R.Verify() && - "createLexicalBlock should return a verifiable DILexicalBlock"); - return R; + return MDLexicalBlock::getDistinct(VMContext, getNonCompileUnitScope(Scope), + File, Line, Col); } static Value *getDbgIntrinsicValueImpl(LLVMContext &VMContext, Value *V) { @@ -758,11 +724,19 @@ static Value *getDbgIntrinsicValueImpl(LLVMContext &VMContext, Value *V) { return MetadataAsValue::get(VMContext, ValueAsMetadata::get(V)); } -Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo, - DIExpression Expr, +static Instruction *withDebugLoc(Instruction *I, const MDLocation *DL) { + I->setDebugLoc(const_cast<MDLocation *>(DL)); + return I; +} + +Instruction *DIBuilder::insertDeclare(Value *Storage, MDLocalVariable* VarInfo, + MDExpression* Expr, const MDLocation *DL, Instruction *InsertBefore) { - assert(VarInfo.isVariable() && - "empty or invalid DIVariable passed to dbg.declare"); + assert(VarInfo && "empty or invalid MDLocalVariable* passed to dbg.declare"); + assert(DL && "Expected debug loc"); + assert(DL->getScope()->getSubprogram() == + VarInfo->getScope()->getSubprogram() && + "Expected matching subprograms"); if (!DeclareFn) DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); @@ -771,14 +745,17 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo, Value *Args[] = {getDbgIntrinsicValueImpl(VMContext, Storage), MetadataAsValue::get(VMContext, VarInfo), MetadataAsValue::get(VMContext, Expr)}; - return CallInst::Create(DeclareFn, Args, "", InsertBefore); + return withDebugLoc(CallInst::Create(DeclareFn, Args, "", InsertBefore), DL); } -Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo, - DIExpression Expr, +Instruction *DIBuilder::insertDeclare(Value *Storage, MDLocalVariable* VarInfo, + MDExpression* Expr, const MDLocation *DL, BasicBlock *InsertAtEnd) { - assert(VarInfo.isVariable() && - "empty or invalid DIVariable passed to dbg.declare"); + assert(VarInfo && "empty or invalid MDLocalVariable* passed to dbg.declare"); + assert(DL && "Expected debug loc"); + assert(DL->getScope()->getSubprogram() == + VarInfo->getScope()->getSubprogram() && + "Expected matching subprograms"); if (!DeclareFn) DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); @@ -791,18 +768,21 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo, // If this block already has a terminator then insert this intrinsic // before the terminator. if (TerminatorInst *T = InsertAtEnd->getTerminator()) - return CallInst::Create(DeclareFn, Args, "", T); - else - return CallInst::Create(DeclareFn, Args, "", InsertAtEnd); + return withDebugLoc(CallInst::Create(DeclareFn, Args, "", T), DL); + return withDebugLoc(CallInst::Create(DeclareFn, Args, "", InsertAtEnd), DL); } Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset, - DIVariable VarInfo, - DIExpression Expr, + MDLocalVariable* VarInfo, + MDExpression* Expr, + const MDLocation *DL, Instruction *InsertBefore) { assert(V && "no value passed to dbg.value"); - assert(VarInfo.isVariable() && - "empty or invalid DIVariable passed to dbg.value"); + assert(VarInfo && "empty or invalid MDLocalVariable* passed to dbg.value"); + assert(DL && "Expected debug loc"); + assert(DL->getScope()->getSubprogram() == + VarInfo->getScope()->getSubprogram() && + "Expected matching subprograms"); if (!ValueFn) ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); @@ -812,16 +792,20 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset, ConstantInt::get(Type::getInt64Ty(VMContext), Offset), MetadataAsValue::get(VMContext, VarInfo), MetadataAsValue::get(VMContext, Expr)}; - return CallInst::Create(ValueFn, Args, "", InsertBefore); + return withDebugLoc(CallInst::Create(ValueFn, Args, "", InsertBefore), DL); } Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset, - DIVariable VarInfo, - DIExpression Expr, + MDLocalVariable* VarInfo, + MDExpression* Expr, + const MDLocation *DL, BasicBlock *InsertAtEnd) { assert(V && "no value passed to dbg.value"); - assert(VarInfo.isVariable() && - "empty or invalid DIVariable passed to dbg.value"); + assert(VarInfo && "empty or invalid MDLocalVariable* passed to dbg.value"); + assert(DL && "Expected debug loc"); + assert(DL->getScope()->getSubprogram() == + VarInfo->getScope()->getSubprogram() && + "Expected matching subprograms"); if (!ValueFn) ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); @@ -831,11 +815,16 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset, ConstantInt::get(Type::getInt64Ty(VMContext), Offset), MetadataAsValue::get(VMContext, VarInfo), MetadataAsValue::get(VMContext, Expr)}; - return CallInst::Create(ValueFn, Args, "", InsertAtEnd); + + return withDebugLoc(CallInst::Create(ValueFn, Args, "", InsertAtEnd), DL); } -void DIBuilder::replaceVTableHolder(DICompositeType &T, DICompositeType VTableHolder) { - T.setContainingType(VTableHolder); +void DIBuilder::replaceVTableHolder(MDCompositeType* &T, MDCompositeType* VTableHolder) { + { + TypedTrackingMDRef<MDCompositeType> N(T); + N->replaceVTableHolder(MDTypeRef::get(VTableHolder)); + T = N.get(); + } // If this didn't create a self-reference, just return. if (T != VTableHolder) @@ -849,9 +838,16 @@ void DIBuilder::replaceVTableHolder(DICompositeType &T, DICompositeType VTableHo trackIfUnresolved(N); } -void DIBuilder::replaceArrays(DICompositeType &T, DIArray Elements, +void DIBuilder::replaceArrays(MDCompositeType* &T, DIArray Elements, DIArray TParams) { - T.setArrays(Elements, TParams); + { + TypedTrackingMDRef<MDCompositeType> N(T); + if (Elements) + N->replaceElements(Elements); + if (TParams) + N->replaceTemplateParams(MDTemplateParameterArray(TParams)); + T = N.get(); + } // If T isn't resolved, there's no problem. if (!T->isResolved()) @@ -861,7 +857,7 @@ void DIBuilder::replaceArrays(DICompositeType &T, DIArray Elements, // arrays explicitly if they're unresolved, or else the cycles will be // orphaned. if (Elements) - trackIfUnresolved(Elements); + trackIfUnresolved(Elements.get()); if (TParams) - trackIfUnresolved(TParams); + trackIfUnresolved(TParams.get()); } diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp index 9a6b953..719c28b 100644 --- a/lib/IR/DebugInfo.cpp +++ b/lib/IR/DebugInfo.cpp @@ -17,7 +17,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" @@ -25,6 +24,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/GVMaterializer.h" #include "llvm/IR/Module.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" @@ -33,612 +33,62 @@ using namespace llvm; using namespace llvm::dwarf; -//===----------------------------------------------------------------------===// -// DIDescriptor -//===----------------------------------------------------------------------===// - -unsigned DIDescriptor::getFlag(StringRef Flag) { - return StringSwitch<unsigned>(Flag) -#define HANDLE_DI_FLAG(ID, NAME) .Case("DIFlag" #NAME, Flag##NAME) -#include "llvm/IR/DebugInfoFlags.def" - .Default(0); -} - -const char *DIDescriptor::getFlagString(unsigned Flag) { - switch (Flag) { - default: - return ""; -#define HANDLE_DI_FLAG(ID, NAME) \ - case Flag##NAME: \ - return "DIFlag" #NAME; -#include "llvm/IR/DebugInfoFlags.def" - } -} - -unsigned DIDescriptor::splitFlags(unsigned Flags, - SmallVectorImpl<unsigned> &SplitFlags) { - // Accessibility flags need to be specially handled, since they're packed - // together. - if (unsigned A = Flags & FlagAccessibility) { - if (A == FlagPrivate) - SplitFlags.push_back(FlagPrivate); - else if (A == FlagProtected) - SplitFlags.push_back(FlagProtected); - else - SplitFlags.push_back(FlagPublic); - Flags &= ~A; - } - -#define HANDLE_DI_FLAG(ID, NAME) \ - if (unsigned Bit = Flags & ID) { \ - SplitFlags.push_back(Bit); \ - Flags &= ~Bit; \ - } -#include "llvm/IR/DebugInfoFlags.def" - - return Flags; -} - -bool DIDescriptor::Verify() const { - return DbgNode && - (DIDerivedType(DbgNode).Verify() || - DICompositeType(DbgNode).Verify() || DIBasicType(DbgNode).Verify() || - DIVariable(DbgNode).Verify() || DISubprogram(DbgNode).Verify() || - DIGlobalVariable(DbgNode).Verify() || DIFile(DbgNode).Verify() || - DICompileUnit(DbgNode).Verify() || DINameSpace(DbgNode).Verify() || - DILexicalBlock(DbgNode).Verify() || - DILexicalBlockFile(DbgNode).Verify() || - DISubrange(DbgNode).Verify() || DIEnumerator(DbgNode).Verify() || - DIObjCProperty(DbgNode).Verify() || - DITemplateTypeParameter(DbgNode).Verify() || - DITemplateValueParameter(DbgNode).Verify() || - DIImportedEntity(DbgNode).Verify()); -} - -static Metadata *getField(const MDNode *DbgNode, unsigned Elt) { - if (!DbgNode || Elt >= DbgNode->getNumOperands()) - return nullptr; - return DbgNode->getOperand(Elt); -} - -static MDNode *getNodeField(const MDNode *DbgNode, unsigned Elt) { - return dyn_cast_or_null<MDNode>(getField(DbgNode, Elt)); -} - -static StringRef getStringField(const MDNode *DbgNode, unsigned Elt) { - if (MDString *MDS = dyn_cast_or_null<MDString>(getField(DbgNode, Elt))) - return MDS->getString(); - return StringRef(); -} - -StringRef DIDescriptor::getStringField(unsigned Elt) const { - return ::getStringField(DbgNode, Elt); -} - -uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const { - if (auto *C = getConstantField(Elt)) - if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) - return CI->getZExtValue(); - - return 0; -} - -int64_t DIDescriptor::getInt64Field(unsigned Elt) const { - if (auto *C = getConstantField(Elt)) - if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) - return CI->getZExtValue(); - - return 0; -} - -DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const { - MDNode *Field = getNodeField(DbgNode, Elt); - return DIDescriptor(Field); -} - -GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const { - return dyn_cast_or_null<GlobalVariable>(getConstantField(Elt)); -} - -Constant *DIDescriptor::getConstantField(unsigned Elt) const { - if (!DbgNode) - return nullptr; - - if (Elt < DbgNode->getNumOperands()) - if (auto *C = - dyn_cast_or_null<ConstantAsMetadata>(DbgNode->getOperand(Elt))) - return C->getValue(); - return nullptr; -} - -Function *DIDescriptor::getFunctionField(unsigned Elt) const { - return dyn_cast_or_null<Function>(getConstantField(Elt)); -} - -/// \brief Return the size reported by the variable's type. -unsigned DIVariable::getSizeInBits(const DITypeIdentifierMap &Map) { - DIType Ty = getType().resolve(Map); - // Follow derived types until we reach a type that - // reports back a size. - while (Ty.isDerivedType() && !Ty.getSizeInBits()) { - DIDerivedType DT(&*Ty); - Ty = DT.getTypeDerivedFrom().resolve(Map); - } - assert(Ty.getSizeInBits() && "type with size 0"); - return Ty.getSizeInBits(); -} - -bool DIExpression::isBitPiece() const { - unsigned N = getNumElements(); - return N >=3 && getElement(N-3) == dwarf::DW_OP_bit_piece; -} - -uint64_t DIExpression::getBitPieceOffset() const { - assert(isBitPiece() && "not a piece"); - return getElement(getNumElements()-2); -} - -uint64_t DIExpression::getBitPieceSize() const { - assert(isBitPiece() && "not a piece"); - return getElement(getNumElements()-1); -} - -DIExpression::iterator DIExpression::Operand::getNext() const { - iterator it(I); - return ++it; -} - -//===----------------------------------------------------------------------===// -// Simple Descriptor Constructors and other Methods -//===----------------------------------------------------------------------===// - -void DIDescriptor::replaceAllUsesWith(LLVMContext &, DIDescriptor D) { - assert(DbgNode && "Trying to replace an unverified type!"); - assert(DbgNode->isTemporary() && "Expected temporary node"); - TempMDNode Temp(get()); - - // Since we use a TrackingVH for the node, its easy for clients to manufacture - // legitimate situations where they want to replaceAllUsesWith() on something - // which, due to uniquing, has merged with the source. We shield clients from - // this detail by allowing a value to be replaced with replaceAllUsesWith() - // itself. - if (Temp.get() == D.get()) { - DbgNode = MDNode::replaceWithUniqued(std::move(Temp)); - return; - } - - Temp->replaceAllUsesWith(D.get()); - DbgNode = D.get(); -} - -void DIDescriptor::replaceAllUsesWith(MDNode *D) { - assert(DbgNode && "Trying to replace an unverified type!"); - assert(DbgNode != D && "This replacement should always happen"); - assert(DbgNode->isTemporary() && "Expected temporary node"); - TempMDNode Node(get()); - Node->replaceAllUsesWith(D); -} - -bool DICompileUnit::Verify() const { - if (!isCompileUnit()) - return false; - - // Don't bother verifying the compilation directory or producer string - // as those could be empty. - return !getFilename().empty(); -} - -bool DIObjCProperty::Verify() const { return isObjCProperty(); } - -/// \brief Check if a value can be a reference to a type. -static bool isTypeRef(const Metadata *MD) { - if (!MD) - return true; - if (auto *S = dyn_cast<MDString>(MD)) - return !S->getString().empty(); - return isa<MDType>(MD); -} - -/// \brief Check if a value can be a ScopeRef. -static bool isScopeRef(const Metadata *MD) { - if (!MD) - return true; - if (auto *S = dyn_cast<MDString>(MD)) - return !S->getString().empty(); - return isa<MDScope>(MD); -} - -#ifndef NDEBUG -/// \brief Check if a value can be a DescriptorRef. -static bool isDescriptorRef(const Metadata *MD) { - if (!MD) - return true; - if (auto *S = dyn_cast<MDString>(MD)) - return !S->getString().empty(); - return isa<MDNode>(MD); -} -#endif - -bool DIType::Verify() const { - auto *N = dyn_cast_or_null<MDType>(DbgNode); - if (!N) - return false; - if (!isScopeRef(N->getScope())) - return false; - - // DIType is abstract, it should be a BasicType, a DerivedType or - // a CompositeType. - if (isBasicType()) - return DIBasicType(DbgNode).Verify(); - - // FIXME: Sink this into the various subclass verifies. - if (getFilename().empty()) { - // Check whether the filename is allowed to be empty. - uint16_t Tag = getTag(); - if (Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type && - Tag != dwarf::DW_TAG_pointer_type && - Tag != dwarf::DW_TAG_ptr_to_member_type && - Tag != dwarf::DW_TAG_reference_type && - Tag != dwarf::DW_TAG_rvalue_reference_type && - Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_array_type && - Tag != dwarf::DW_TAG_enumeration_type && - Tag != dwarf::DW_TAG_subroutine_type && - Tag != dwarf::DW_TAG_inheritance && Tag != dwarf::DW_TAG_friend && - Tag != dwarf::DW_TAG_structure_type && Tag != dwarf::DW_TAG_member && - Tag != dwarf::DW_TAG_typedef) - return false; - } - - if (isCompositeType()) - return DICompositeType(DbgNode).Verify(); - if (isDerivedType()) - return DIDerivedType(DbgNode).Verify(); - return false; -} - -bool DIBasicType::Verify() const { - return dyn_cast_or_null<MDBasicType>(DbgNode); -} - -bool DIDerivedType::Verify() const { - auto *N = dyn_cast_or_null<MDDerivedTypeBase>(DbgNode); - if (!N) - return false; - if (getTag() == dwarf::DW_TAG_ptr_to_member_type) { - auto *D = dyn_cast<MDDerivedType>(N); - if (!D) - return false; - if (!isTypeRef(D->getExtraData())) - return false; - } - return isTypeRef(N->getBaseType()); -} - -bool DICompositeType::Verify() const { - auto *N = dyn_cast_or_null<MDCompositeTypeBase>(DbgNode); - return N && isTypeRef(N->getBaseType()) && isTypeRef(N->getVTableHolder()) && - !(isLValueReference() && isRValueReference()); -} - -bool DISubprogram::Verify() const { - auto *N = dyn_cast_or_null<MDSubprogram>(DbgNode); - if (!N) - return false; - - if (!isScopeRef(N->getScope())) - return false; - - if (auto *Op = N->getType()) - if (!isa<MDNode>(Op)) - return false; - - if (!isTypeRef(getContainingType())) - return false; - - if (isLValueReference() && isRValueReference()) - return false; - - // If a DISubprogram has an llvm::Function*, then scope chains from all - // instructions within the function should lead to this DISubprogram. - if (auto *F = getFunction()) { - for (auto &BB : *F) { - for (auto &I : BB) { - DebugLoc DL = I.getDebugLoc(); - if (DL.isUnknown()) - continue; - - MDNode *Scope = nullptr; - MDNode *IA = nullptr; - // walk the inlined-at scopes - while ((IA = DL.getInlinedAt())) - DL = DebugLoc::getFromDILocation(IA); - DL.getScopeAndInlinedAt(Scope, IA); - if (!Scope) - return false; - assert(!IA); - while (!DIDescriptor(Scope).isSubprogram()) { - DILexicalBlockFile D(Scope); - Scope = D.isLexicalBlockFile() - ? D.getScope() - : DebugLoc::getFromDILexicalBlock(Scope).getScope(); - if (!Scope) - return false; - } - if (!DISubprogram(Scope).describes(F)) - return false; - } - } - } - - return true; -} - -bool DIGlobalVariable::Verify() const { - auto *N = dyn_cast_or_null<MDGlobalVariable>(DbgNode); - - if (!N) - return false; - - if (N->getDisplayName().empty()) - return false; - - if (auto *Op = N->getScope()) - if (!isa<MDNode>(Op)) - return false; - - if (auto *Op = N->getStaticDataMemberDeclaration()) - if (!isa<MDNode>(Op)) - return false; - - return isTypeRef(N->getType()); -} - -bool DIVariable::Verify() const { - auto *N = dyn_cast_or_null<MDLocalVariable>(DbgNode); - - if (!N) - return false; - - if (auto *Op = N->getScope()) - if (!isa<MDNode>(Op)) - return false; - - return isTypeRef(N->getType()); -} - -bool DILocation::Verify() const { - return dyn_cast_or_null<MDLocation>(DbgNode); -} -bool DINameSpace::Verify() const { - return dyn_cast_or_null<MDNamespace>(DbgNode); -} -bool DIFile::Verify() const { return dyn_cast_or_null<MDFile>(DbgNode); } -bool DIEnumerator::Verify() const { - return dyn_cast_or_null<MDEnumerator>(DbgNode); -} -bool DISubrange::Verify() const { - return dyn_cast_or_null<MDSubrange>(DbgNode); -} -bool DILexicalBlock::Verify() const { - return dyn_cast_or_null<MDLexicalBlock>(DbgNode); -} -bool DILexicalBlockFile::Verify() const { - return dyn_cast_or_null<MDLexicalBlockFile>(DbgNode); -} -bool DITemplateTypeParameter::Verify() const { - return dyn_cast_or_null<MDTemplateTypeParameter>(DbgNode); -} -bool DITemplateValueParameter::Verify() const { - return dyn_cast_or_null<MDTemplateValueParameter>(DbgNode); -} -bool DIImportedEntity::Verify() const { - return dyn_cast_or_null<MDImportedEntity>(DbgNode); -} - -void DICompositeType::setArraysHelper(MDNode *Elements, MDNode *TParams) { - TypedTrackingMDRef<MDCompositeTypeBase> N(get()); - if (Elements) - N->replaceElements(cast<MDTuple>(Elements)); - if (TParams) - N->replaceTemplateParams(cast<MDTuple>(TParams)); - DbgNode = N; -} - -DIScopeRef DIScope::getRef() const { - if (!isCompositeType()) - return DIScopeRef(*this); - DICompositeType DTy(DbgNode); - if (!DTy.getIdentifier()) - return DIScopeRef(*this); - return DIScopeRef(DTy.getIdentifier()); -} - -void DICompositeType::setContainingType(DICompositeType ContainingType) { - TypedTrackingMDRef<MDCompositeTypeBase> N(get()); - N->replaceVTableHolder(ContainingType.getRef()); - DbgNode = N; -} - -bool DIVariable::isInlinedFnArgument(const Function *CurFn) { - assert(CurFn && "Invalid function"); - if (!getContext().isSubprogram()) - return false; - // This variable is not inlined function argument if its scope - // does not describe current function. - return !DISubprogram(getContext()).describes(CurFn); -} - -Function *DISubprogram::getFunction() const { - if (auto *N = get()) - if (auto *C = dyn_cast_or_null<ConstantAsMetadata>(N->getFunction())) - return dyn_cast<Function>(C->getValue()); - return nullptr; -} - -bool DISubprogram::describes(const Function *F) { - assert(F && "Invalid function"); - if (F == getFunction()) - return true; - StringRef Name = getLinkageName(); - if (Name.empty()) - Name = getName(); - if (F->getName() == Name) - return true; - return false; -} - -GlobalVariable *DIGlobalVariable::getGlobal() const { - return dyn_cast_or_null<GlobalVariable>(getConstant()); -} - -DIScopeRef DIScope::getContext() const { - - if (isType()) - return DIType(DbgNode).getContext(); - - if (isSubprogram()) - return DIScopeRef(DISubprogram(DbgNode).getContext()); - - if (isLexicalBlock()) - return DIScopeRef(DILexicalBlock(DbgNode).getContext()); - - if (isLexicalBlockFile()) - return DIScopeRef(DILexicalBlockFile(DbgNode).getContext()); - - if (isNameSpace()) - return DIScopeRef(DINameSpace(DbgNode).getContext()); - - assert((isFile() || isCompileUnit()) && "Unhandled type of scope."); - return DIScopeRef(nullptr); -} - -StringRef DIScope::getName() const { - if (isType()) - return DIType(DbgNode).getName(); - if (isSubprogram()) - return DISubprogram(DbgNode).getName(); - if (isNameSpace()) - return DINameSpace(DbgNode).getName(); - assert((isLexicalBlock() || isLexicalBlockFile() || isFile() || - isCompileUnit()) && - "Unhandled type of scope."); - return StringRef(); -} - -StringRef DIScope::getFilename() const { - if (auto *N = get()) - return ::getStringField(dyn_cast_or_null<MDNode>(N->getFile()), 0); - return ""; -} - -StringRef DIScope::getDirectory() const { - if (auto *N = get()) - return ::getStringField(dyn_cast_or_null<MDNode>(N->getFile()), 1); - return ""; -} - -void DICompileUnit::replaceSubprograms(DIArray Subprograms) { - assert(Verify() && "Expected compile unit"); - get()->replaceSubprograms(cast_or_null<MDTuple>(Subprograms.get())); -} - -void DICompileUnit::replaceGlobalVariables(DIArray GlobalVariables) { - assert(Verify() && "Expected compile unit"); - get()->replaceGlobalVariables(cast_or_null<MDTuple>(GlobalVariables.get())); -} - -DILocation DILocation::copyWithNewScope(LLVMContext &Ctx, - DILexicalBlockFile NewScope) { - assert(Verify()); - assert(NewScope && "Expected valid scope"); - - const auto *Old = cast<MDLocation>(DbgNode); - return DILocation(MDLocation::get(Ctx, Old->getLine(), Old->getColumn(), - NewScope, Old->getInlinedAt())); -} - -unsigned DILocation::computeNewDiscriminator(LLVMContext &Ctx) { - std::pair<const char *, unsigned> Key(getFilename().data(), getLineNumber()); - return ++Ctx.pImpl->DiscriminatorTable[Key]; -} - -DIVariable llvm::createInlinedVariable(MDNode *DV, MDNode *InlinedScope, - LLVMContext &VMContext) { - assert(DIVariable(DV).Verify() && "Expected a DIVariable"); - return cast<MDLocalVariable>(DV) - ->withInline(cast_or_null<MDLocation>(InlinedScope)); -} - -DIVariable llvm::cleanseInlinedVariable(MDNode *DV, LLVMContext &VMContext) { - assert(DIVariable(DV).Verify() && "Expected a DIVariable"); - return cast<MDLocalVariable>(DV)->withoutInline(); -} - DISubprogram llvm::getDISubprogram(const MDNode *Scope) { - DIDescriptor D(Scope); - if (D.isSubprogram()) - return DISubprogram(Scope); - - if (D.isLexicalBlockFile()) - return getDISubprogram(DILexicalBlockFile(Scope).getContext()); - - if (D.isLexicalBlock()) - return getDISubprogram(DILexicalBlock(Scope).getContext()); - - return DISubprogram(); + if (auto *LocalScope = dyn_cast_or_null<MDLocalScope>(Scope)) + return LocalScope->getSubprogram(); + return nullptr; } DISubprogram llvm::getDISubprogram(const Function *F) { // We look for the first instr that has a debug annotation leading back to F. for (auto &BB : *F) { auto Inst = std::find_if(BB.begin(), BB.end(), [](const Instruction &Inst) { - return !Inst.getDebugLoc().isUnknown(); + return Inst.getDebugLoc(); }); if (Inst == BB.end()) continue; DebugLoc DLoc = Inst->getDebugLoc(); - const MDNode *Scope = DLoc.getScopeNode(); + const MDNode *Scope = DLoc.getInlinedAtScope(); DISubprogram Subprogram = getDISubprogram(Scope); - return Subprogram.describes(F) ? Subprogram : DISubprogram(); + return Subprogram->describes(F) ? Subprogram : DISubprogram(); } return DISubprogram(); } DICompositeType llvm::getDICompositeType(DIType T) { - if (T.isCompositeType()) - return DICompositeType(T); + if (auto *C = dyn_cast_or_null<MDCompositeTypeBase>(T)) + return C; - if (T.isDerivedType()) { + if (auto *D = dyn_cast_or_null<MDDerivedTypeBase>(T)) { // This function is currently used by dragonegg and dragonegg does // not generate identifier for types, so using an empty map to resolve // DerivedFrom should be fine. DITypeIdentifierMap EmptyMap; - return getDICompositeType( - DIDerivedType(T).getTypeDerivedFrom().resolve(EmptyMap)); + return getDICompositeType(D->getBaseType().resolve(EmptyMap)); } - return DICompositeType(); + return nullptr; } DITypeIdentifierMap llvm::generateDITypeIdentifierMap(const NamedMDNode *CU_Nodes) { DITypeIdentifierMap Map; for (unsigned CUi = 0, CUe = CU_Nodes->getNumOperands(); CUi != CUe; ++CUi) { - DICompileUnit CU(CU_Nodes->getOperand(CUi)); - DIArray Retain = CU.getRetainedTypes(); - for (unsigned Ti = 0, Te = Retain.getNumElements(); Ti != Te; ++Ti) { - if (!Retain.getElement(Ti).isCompositeType()) + auto *CU = cast<MDCompileUnit>(CU_Nodes->getOperand(CUi)); + DIArray Retain = CU->getRetainedTypes(); + for (unsigned Ti = 0, Te = Retain.size(); Ti != Te; ++Ti) { + if (!isa<MDCompositeType>(Retain[Ti])) continue; - DICompositeType Ty(Retain.getElement(Ti)); - if (MDString *TypeId = Ty.getIdentifier()) { + auto *Ty = cast<MDCompositeType>(Retain[Ti]); + if (MDString *TypeId = Ty->getRawIdentifier()) { // Definition has priority over declaration. // Try to insert (TypeId, Ty) to Map. std::pair<DITypeIdentifierMap::iterator, bool> P = Map.insert(std::make_pair(TypeId, Ty)); // If TypeId already exists in Map and this is a definition, replace // whatever we had (declaration or definition) with the definition. - if (!P.second && !Ty.isForwardDecl()) + if (!P.second && !Ty->isForwardDecl()) P.first->second = Ty; } } @@ -673,37 +123,28 @@ void DebugInfoFinder::processModule(const Module &M) { InitializeTypeMap(M); if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu")) { for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { - DICompileUnit CU(CU_Nodes->getOperand(i)); + DICompileUnit CU = cast<MDCompileUnit>(CU_Nodes->getOperand(i)); addCompileUnit(CU); - DIArray GVs = CU.getGlobalVariables(); - for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) { - DIGlobalVariable DIG(GVs.getElement(i)); + for (DIGlobalVariable DIG : CU->getGlobalVariables()) { if (addGlobalVariable(DIG)) { - processScope(DIG.getContext()); - processType(DIG.getType().resolve(TypeIdentifierMap)); + processScope(DIG->getScope()); + processType(DIG->getType().resolve(TypeIdentifierMap)); } } - DIArray SPs = CU.getSubprograms(); - for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) - processSubprogram(DISubprogram(SPs.getElement(i))); - DIArray EnumTypes = CU.getEnumTypes(); - for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) - processType(DIType(EnumTypes.getElement(i))); - DIArray RetainedTypes = CU.getRetainedTypes(); - for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) - processType(DIType(RetainedTypes.getElement(i))); - DIArray Imports = CU.getImportedEntities(); - for (unsigned i = 0, e = Imports.getNumElements(); i != e; ++i) { - DIImportedEntity Import = DIImportedEntity(Imports.getElement(i)); - if (!Import) - continue; - DIDescriptor Entity = Import.getEntity().resolve(TypeIdentifierMap); - if (Entity.isType()) - processType(DIType(Entity)); - else if (Entity.isSubprogram()) - processSubprogram(DISubprogram(Entity)); - else if (Entity.isNameSpace()) - processScope(DINameSpace(Entity).getContext()); + for (auto *SP : CU->getSubprograms()) + processSubprogram(SP); + for (auto *ET : CU->getEnumTypes()) + processType(ET); + for (auto *RT : CU->getRetainedTypes()) + processType(RT); + for (DIImportedEntity Import : CU->getImportedEntities()) { + auto *Entity = Import->getEntity().resolve(TypeIdentifierMap); + if (auto *T = dyn_cast<MDType>(Entity)) + processType(T); + else if (auto *SP = dyn_cast<MDSubprogram>(Entity)) + processSubprogram(SP); + else if (auto *NS = dyn_cast<MDNamespace>(Entity)) + processScope(NS->getScope()); } } } @@ -713,79 +154,66 @@ void DebugInfoFinder::processLocation(const Module &M, DILocation Loc) { if (!Loc) return; InitializeTypeMap(M); - processScope(Loc.getScope()); - processLocation(M, Loc.getOrigLocation()); + processScope(Loc->getScope()); + processLocation(M, Loc->getInlinedAt()); } void DebugInfoFinder::processType(DIType DT) { if (!addType(DT)) return; - processScope(DT.getContext().resolve(TypeIdentifierMap)); - if (DT.isCompositeType()) { - DICompositeType DCT(DT); - processType(DCT.getTypeDerivedFrom().resolve(TypeIdentifierMap)); - if (DT.isSubroutineType()) { - DITypeArray DTA = DISubroutineType(DT).getTypeArray(); - for (unsigned i = 0, e = DTA.getNumElements(); i != e; ++i) - processType(DTA.getElement(i).resolve(TypeIdentifierMap)); + processScope(DT->getScope().resolve(TypeIdentifierMap)); + if (auto *DCT = dyn_cast<MDCompositeTypeBase>(DT)) { + processType(DCT->getBaseType().resolve(TypeIdentifierMap)); + if (auto *ST = dyn_cast<MDSubroutineType>(DCT)) { + for (MDTypeRef Ref : ST->getTypeArray()) + processType(Ref.resolve(TypeIdentifierMap)); return; } - DIArray DA = DCT.getElements(); - for (unsigned i = 0, e = DA.getNumElements(); i != e; ++i) { - DIDescriptor D = DA.getElement(i); - if (D.isType()) - processType(DIType(D)); - else if (D.isSubprogram()) - processSubprogram(DISubprogram(D)); + for (Metadata *D : DCT->getElements()) { + if (auto *T = dyn_cast<MDType>(D)) + processType(T); + else if (DISubprogram SP = dyn_cast<MDSubprogram>(D)) + processSubprogram(SP); } - } else if (DT.isDerivedType()) { - DIDerivedType DDT(DT); - processType(DDT.getTypeDerivedFrom().resolve(TypeIdentifierMap)); + } else if (auto *DDT = dyn_cast<MDDerivedTypeBase>(DT)) { + processType(DDT->getBaseType().resolve(TypeIdentifierMap)); } } void DebugInfoFinder::processScope(DIScope Scope) { - if (Scope.isType()) { - DIType Ty(Scope); + if (!Scope) + return; + if (DIType Ty = dyn_cast<MDType>(Scope)) { processType(Ty); return; } - if (Scope.isCompileUnit()) { - addCompileUnit(DICompileUnit(Scope)); + if (DICompileUnit CU = dyn_cast<MDCompileUnit>(Scope)) { + addCompileUnit(CU); return; } - if (Scope.isSubprogram()) { - processSubprogram(DISubprogram(Scope)); + if (DISubprogram SP = dyn_cast<MDSubprogram>(Scope)) { + processSubprogram(SP); return; } if (!addScope(Scope)) return; - if (Scope.isLexicalBlock()) { - DILexicalBlock LB(Scope); - processScope(LB.getContext()); - } else if (Scope.isLexicalBlockFile()) { - DILexicalBlockFile LBF = DILexicalBlockFile(Scope); - processScope(LBF.getScope()); - } else if (Scope.isNameSpace()) { - DINameSpace NS(Scope); - processScope(NS.getContext()); + if (auto *LB = dyn_cast<MDLexicalBlockBase>(Scope)) { + processScope(LB->getScope()); + } else if (auto *NS = dyn_cast<MDNamespace>(Scope)) { + processScope(NS->getScope()); } } void DebugInfoFinder::processSubprogram(DISubprogram SP) { if (!addSubprogram(SP)) return; - processScope(SP.getContext().resolve(TypeIdentifierMap)); - processType(SP.getType()); - DIArray TParams = SP.getTemplateParams(); - for (unsigned I = 0, E = TParams.getNumElements(); I != E; ++I) { - DIDescriptor Element = TParams.getElement(I); - if (Element.isTemplateTypeParameter()) { - DITemplateTypeParameter TType(Element); - processType(TType.getType().resolve(TypeIdentifierMap)); - } else if (Element.isTemplateValueParameter()) { - DITemplateValueParameter TVal(Element); - processType(TVal.getType().resolve(TypeIdentifierMap)); + processScope(SP->getScope().resolve(TypeIdentifierMap)); + processType(SP->getType()); + for (auto *Element : SP->getTemplateParams()) { + if (auto *TType = dyn_cast<MDTemplateTypeParameter>(Element)) { + processType(TType->getType().resolve(TypeIdentifierMap)); + } else if (auto *TVal = dyn_cast<MDTemplateValueParameter>(Element)) { + processType(TVal->getType().resolve(TypeIdentifierMap)); } } } @@ -797,14 +225,14 @@ void DebugInfoFinder::processDeclare(const Module &M, return; InitializeTypeMap(M); - DIDescriptor DV(N); - if (!DV.isVariable()) + DIVariable DV = dyn_cast<MDLocalVariable>(N); + if (!DV) return; if (!NodesSeen.insert(DV).second) return; - processScope(DIVariable(N).getContext()); - processType(DIVariable(N).getType().resolve(TypeIdentifierMap)); + processScope(DV->getScope()); + processType(DV->getType().resolve(TypeIdentifierMap)); } void DebugInfoFinder::processValue(const Module &M, const DbgValueInst *DVI) { @@ -813,14 +241,14 @@ void DebugInfoFinder::processValue(const Module &M, const DbgValueInst *DVI) { return; InitializeTypeMap(M); - DIDescriptor DV(N); - if (!DV.isVariable()) + DIVariable DV = dyn_cast<MDLocalVariable>(N); + if (!DV) return; if (!NodesSeen.insert(DV).second) return; - processScope(DIVariable(N).getContext()); - processType(DIVariable(N).getType().resolve(TypeIdentifierMap)); + processScope(DV->getScope()); + processType(DV->getType().resolve(TypeIdentifierMap)); } bool DebugInfoFinder::addType(DIType DT) { @@ -879,76 +307,17 @@ bool DebugInfoFinder::addScope(DIScope Scope) { return true; } -//===----------------------------------------------------------------------===// -// DIDescriptor: dump routines for all descriptors. -//===----------------------------------------------------------------------===// - -void DIDescriptor::dump() const { - print(dbgs()); - dbgs() << '\n'; -} - -void DIDescriptor::print(raw_ostream &OS) const { - if (!get()) - return; - get()->print(OS); -} - -static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS, - const LLVMContext &Ctx) { - if (!DL.isUnknown()) { // Print source line info. - DIScope Scope(DL.getScope(Ctx)); - assert(Scope.isScope() && "Scope of a DebugLoc should be a DIScope."); - // Omit the directory, because it's likely to be long and uninteresting. - CommentOS << Scope.getFilename(); - CommentOS << ':' << DL.getLine(); - if (DL.getCol() != 0) - CommentOS << ':' << DL.getCol(); - DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx)); - if (!InlinedAtDL.isUnknown()) { - CommentOS << " @[ "; - printDebugLoc(InlinedAtDL, CommentOS, Ctx); - CommentOS << " ]"; - } - } -} - -void DIVariable::printExtendedName(raw_ostream &OS) const { - const LLVMContext &Ctx = DbgNode->getContext(); - StringRef Res = getName(); - if (!Res.empty()) - OS << Res << "," << getLineNumber(); - if (MDNode *InlinedAt = getInlinedAt()) { - DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt); - if (!InlinedAtDL.isUnknown()) { - OS << " @["; - printDebugLoc(InlinedAtDL, OS, Ctx); - OS << "]"; +bool llvm::stripDebugInfo(Function &F) { + bool Changed = false; + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + if (I.getDebugLoc()) { + Changed = true; + I.setDebugLoc(DebugLoc()); + } } } -} - -template <> DIRef<DIDescriptor>::DIRef(const Metadata *V) : Val(V) { - assert(isDescriptorRef(V) && - "DIDescriptorRef should be a MDString or MDNode"); -} -template <> DIRef<DIScope>::DIRef(const Metadata *V) : Val(V) { - assert(isScopeRef(V) && "DIScopeRef should be a MDString or MDNode"); -} -template <> DIRef<DIType>::DIRef(const Metadata *V) : Val(V) { - assert(isTypeRef(V) && "DITypeRef should be a MDString or MDNode"); -} - -template <> -DIDescriptorRef DIDescriptor::getFieldAs<DIDescriptorRef>(unsigned Elt) const { - return DIDescriptorRef(cast_or_null<Metadata>(getField(DbgNode, Elt))); -} -template <> -DIScopeRef DIDescriptor::getFieldAs<DIScopeRef>(unsigned Elt) const { - return DIScopeRef(cast_or_null<Metadata>(getField(DbgNode, Elt))); -} -template <> DITypeRef DIDescriptor::getFieldAs<DITypeRef>(unsigned Elt) const { - return DITypeRef(cast_or_null<Metadata>(getField(DbgNode, Elt))); + return Changed; } bool llvm::StripDebugInfo(Module &M) { @@ -984,16 +353,11 @@ bool llvm::StripDebugInfo(Module &M) { } } - for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI) - for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE; - ++FI) - for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; - ++BI) { - if (!BI->getDebugLoc().isUnknown()) { - Changed = true; - BI->setDebugLoc(DebugLoc()); - } - } + for (Function &F : M) + Changed |= stripDebugInfo(F); + + if (GVMaterializer *Materializer = M.getMaterializer()) + Materializer->setStripDebugInfo(); return Changed; } @@ -1014,11 +378,9 @@ llvm::makeSubprogramMap(const Module &M) { return R; for (MDNode *N : CU_Nodes->operands()) { - DICompileUnit CUNode(N); - DIArray SPs = CUNode.getSubprograms(); - for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { - DISubprogram SP(SPs.getElement(i)); - if (Function *F = SP.getFunction()) + DICompileUnit CUNode = cast<MDCompileUnit>(N); + for (DISubprogram SP : CUNode->getSubprograms()) { + if (Function *F = SP->getFunction()) R.insert(std::make_pair(F, SP)); } } diff --git a/lib/IR/DebugInfoMetadata.cpp b/lib/IR/DebugInfoMetadata.cpp index 89ec1bc..f6f2ff2 100644 --- a/lib/IR/DebugInfoMetadata.cpp +++ b/lib/IR/DebugInfoMetadata.cpp @@ -14,6 +14,7 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "LLVMContextImpl.h" #include "MetadataImpl.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/IR/Function.h" using namespace llvm; @@ -44,6 +45,7 @@ MDLocation *MDLocation::getImpl(LLVMContext &Context, unsigned Line, // Fixup column. adjustColumn(Column); + assert(Scope && "Expected scope"); if (Storage == Uniqued) { if (auto *N = getUniqued(Context.pImpl->MDLocations, @@ -64,6 +66,96 @@ MDLocation *MDLocation::getImpl(LLVMContext &Context, unsigned Line, Storage, Context.pImpl->MDLocations); } +unsigned MDLocation::computeNewDiscriminator() const { + // FIXME: This seems completely wrong. + // + // 1. If two modules are generated in the same context, then the second + // Module will get different discriminators than it would have if it were + // generated in its own context. + // 2. If this function is called after round-tripping to bitcode instead of + // before, it will give a different (and potentially incorrect!) return. + // + // The discriminator should instead be calculated from local information + // where it's actually needed. This logic should be moved to + // AddDiscriminators::runOnFunction(), where it doesn't pollute the + // LLVMContext. + std::pair<const char *, unsigned> Key(getFilename().data(), getLine()); + return ++getContext().pImpl->DiscriminatorTable[Key]; +} + +unsigned DebugNode::getFlag(StringRef Flag) { + return StringSwitch<unsigned>(Flag) +#define HANDLE_DI_FLAG(ID, NAME) .Case("DIFlag" #NAME, Flag##NAME) +#include "llvm/IR/DebugInfoFlags.def" + .Default(0); +} + +const char *DebugNode::getFlagString(unsigned Flag) { + switch (Flag) { + default: + return ""; +#define HANDLE_DI_FLAG(ID, NAME) \ + case Flag##NAME: \ + return "DIFlag" #NAME; +#include "llvm/IR/DebugInfoFlags.def" + } +} + +unsigned DebugNode::splitFlags(unsigned Flags, + SmallVectorImpl<unsigned> &SplitFlags) { + // Accessibility flags need to be specially handled, since they're packed + // together. + if (unsigned A = Flags & FlagAccessibility) { + if (A == FlagPrivate) + SplitFlags.push_back(FlagPrivate); + else if (A == FlagProtected) + SplitFlags.push_back(FlagProtected); + else + SplitFlags.push_back(FlagPublic); + Flags &= ~A; + } + +#define HANDLE_DI_FLAG(ID, NAME) \ + if (unsigned Bit = Flags & ID) { \ + SplitFlags.push_back(Bit); \ + Flags &= ~Bit; \ + } +#include "llvm/IR/DebugInfoFlags.def" + + return Flags; +} + +MDScopeRef MDScope::getScope() const { + if (auto *T = dyn_cast<MDType>(this)) + return T->getScope(); + + if (auto *SP = dyn_cast<MDSubprogram>(this)) + return SP->getScope(); + + if (auto *LB = dyn_cast<MDLexicalBlockBase>(this)) + return MDScopeRef(LB->getScope()); + + if (auto *NS = dyn_cast<MDNamespace>(this)) + return MDScopeRef(NS->getScope()); + + assert((isa<MDFile>(this) || isa<MDCompileUnit>(this)) && + "Unhandled type of scope."); + return nullptr; +} + +StringRef MDScope::getName() const { + if (auto *T = dyn_cast<MDType>(this)) + return T->getName(); + if (auto *SP = dyn_cast<MDSubprogram>(this)) + return SP->getName(); + if (auto *NS = dyn_cast<MDNamespace>(this)) + return NS->getName(); + assert((isa<MDLexicalBlockBase>(this) || isa<MDFile>(this) || + isa<MDCompileUnit>(this)) && + "Unhandled type of scope."); + return ""; +} + static StringRef getString(const MDString *S) { if (S) return S->getString(); @@ -237,6 +329,12 @@ MDCompileUnit *MDCompileUnit::getImpl( (SourceLanguage, IsOptimized, RuntimeVersion, EmissionKind), Ops); } +MDSubprogram *MDLocalScope::getSubprogram() const { + if (auto *Block = dyn_cast<MDLexicalBlockBase>(this)) + return Block->getScope()->getSubprogram(); + return const_cast<MDSubprogram *>(cast<MDSubprogram>(this)); +} + MDSubprogram *MDSubprogram::getImpl( LLVMContext &Context, Metadata *Scope, MDString *Name, MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type, @@ -262,6 +360,21 @@ MDSubprogram *MDSubprogram::getImpl( Ops); } +Function *MDSubprogram::getFunction() const { + // FIXME: Should this be looking through bitcasts? + return dyn_cast_or_null<Function>(getFunctionConstant()); +} + +bool MDSubprogram::describes(const Function *F) const { + assert(F && "Invalid function"); + if (F == getFunction()) + return true; + StringRef Name = getLinkageName(); + if (Name.empty()) + Name = getName(); + return F->getName() == Name; +} + void MDSubprogram::replaceFunction(Function *F) { replaceFunction(F ? ConstantAsMetadata::get(F) : static_cast<ConstantAsMetadata *>(nullptr)); @@ -271,6 +384,7 @@ MDLexicalBlock *MDLexicalBlock::getImpl(LLVMContext &Context, Metadata *Scope, Metadata *File, unsigned Line, unsigned Column, StorageType Storage, bool ShouldCreate) { + assert(Scope && "Expected scope"); DEFINE_GETIMPL_LOOKUP(MDLexicalBlock, (Scope, File, Line, Column)); Metadata *Ops[] = {File, Scope}; DEFINE_GETIMPL_STORE(MDLexicalBlock, (Line, Column), Ops); @@ -281,6 +395,7 @@ MDLexicalBlockFile *MDLexicalBlockFile::getImpl(LLVMContext &Context, unsigned Discriminator, StorageType Storage, bool ShouldCreate) { + assert(Scope && "Expected scope"); DEFINE_GETIMPL_LOOKUP(MDLexicalBlockFile, (Scope, File, Discriminator)); Metadata *Ops[] = {File, Scope}; DEFINE_GETIMPL_STORE(MDLexicalBlockFile, (Discriminator), Ops); @@ -335,20 +450,23 @@ MDGlobalVariable::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name, Ops); } -MDLocalVariable *MDLocalVariable::getImpl( - LLVMContext &Context, unsigned Tag, Metadata *Scope, MDString *Name, - Metadata *File, unsigned Line, Metadata *Type, unsigned Arg, unsigned Flags, - Metadata *InlinedAt, StorageType Storage, bool ShouldCreate) { +MDLocalVariable *MDLocalVariable::getImpl(LLVMContext &Context, unsigned Tag, + Metadata *Scope, MDString *Name, + Metadata *File, unsigned Line, + Metadata *Type, unsigned Arg, + unsigned Flags, StorageType Storage, + bool ShouldCreate) { // Truncate Arg to 8 bits. // // FIXME: This is gross (and should be changed to an assert or removed), but // it matches historical behaviour for now. Arg &= (1u << 8) - 1; + assert(Scope && "Expected scope"); assert(isCanonical(Name) && "Expected canonical MDString"); DEFINE_GETIMPL_LOOKUP(MDLocalVariable, (Tag, Scope, getString(Name), File, - Line, Type, Arg, Flags, InlinedAt)); - Metadata *Ops[] = {Scope, Name, File, Type, InlinedAt}; + Line, Type, Arg, Flags)); + Metadata *Ops[] = {Scope, Name, File, Type}; DEFINE_GETIMPL_STORE(MDLocalVariable, (Tag, Line, Arg, Flags), Ops); } @@ -391,6 +509,24 @@ bool MDExpression::isValid() const { return true; } +bool MDExpression::isBitPiece() const { + assert(isValid() && "Expected valid expression"); + if (unsigned N = getNumElements()) + if (N >= 3) + return getElement(N - 3) == dwarf::DW_OP_bit_piece; + return false; +} + +uint64_t MDExpression::getBitPieceOffset() const { + assert(isBitPiece() && "Expected bit piece"); + return getElement(getNumElements() - 2); +} + +uint64_t MDExpression::getBitPieceSize() const { + assert(isBitPiece() && "Expected bit piece"); + return getElement(getNumElements() - 1); +} + MDObjCProperty *MDObjCProperty::getImpl( LLVMContext &Context, MDString *Name, Metadata *File, unsigned Line, MDString *GetterName, MDString *SetterName, unsigned Attributes, diff --git a/lib/IR/DebugLoc.cpp b/lib/IR/DebugLoc.cpp index e1bf795..4cf7e9e 100644 --- a/lib/IR/DebugLoc.cpp +++ b/lib/IR/DebugLoc.cpp @@ -16,101 +16,87 @@ using namespace llvm; //===----------------------------------------------------------------------===// // DebugLoc Implementation //===----------------------------------------------------------------------===// +DebugLoc::DebugLoc(const MDLocation *L) : Loc(const_cast<MDLocation *>(L)) {} +DebugLoc::DebugLoc(const MDNode *L) : Loc(const_cast<MDNode *>(L)) {} -unsigned DebugLoc::getLine() const { return DILocation(Loc).getLineNumber(); } -unsigned DebugLoc::getCol() const { return DILocation(Loc).getColumnNumber(); } +MDLocation *DebugLoc::get() const { + return cast_or_null<MDLocation>(Loc.get()); +} -MDNode *DebugLoc::getScope() const { return DILocation(Loc).getScope(); } +unsigned DebugLoc::getLine() const { + assert(get() && "Expected valid DebugLoc"); + return get()->getLine(); +} -MDNode *DebugLoc::getInlinedAt() const { - return DILocation(Loc).getOrigLocation(); +unsigned DebugLoc::getCol() const { + assert(get() && "Expected valid DebugLoc"); + return get()->getColumn(); } -/// Return both the Scope and the InlinedAt values. -void DebugLoc::getScopeAndInlinedAt(MDNode *&Scope, MDNode *&IA) const { - Scope = getScope(); - IA = getInlinedAt(); +MDNode *DebugLoc::getScope() const { + assert(get() && "Expected valid DebugLoc"); + return get()->getScope(); } -MDNode *DebugLoc::getScopeNode() const { - if (MDNode *InlinedAt = getInlinedAt()) - return DebugLoc::getFromDILocation(InlinedAt).getScopeNode(); - return getScope(); +MDLocation *DebugLoc::getInlinedAt() const { + assert(get() && "Expected valid DebugLoc"); + return get()->getInlinedAt(); +} + +MDNode *DebugLoc::getInlinedAtScope() const { + return cast<MDLocation>(Loc)->getInlinedAtScope(); } DebugLoc DebugLoc::getFnDebugLoc() const { - const MDNode *Scope = getScopeNode(); - DISubprogram SP = getDISubprogram(Scope); - if (SP.isSubprogram()) - return DebugLoc::get(SP.getScopeLineNumber(), 0, SP); + // FIXME: Add a method on \a MDLocation that does this work. + const MDNode *Scope = getInlinedAtScope(); + if (DISubprogram SP = getDISubprogram(Scope)) + return DebugLoc::get(SP->getScopeLine(), 0, SP); return DebugLoc(); } -DebugLoc DebugLoc::get(unsigned Line, unsigned Col, - MDNode *Scope, MDNode *InlinedAt) { +DebugLoc DebugLoc::get(unsigned Line, unsigned Col, const MDNode *Scope, + const MDNode *InlinedAt) { // If no scope is available, this is an unknown location. if (!Scope) return DebugLoc(); - return getFromDILocation( - MDLocation::get(Scope->getContext(), Line, Col, Scope, InlinedAt)); -} - -/// getAsMDNode - This method converts the compressed DebugLoc node into a -/// DILocation-compatible MDNode. -MDNode *DebugLoc::getAsMDNode() const { return Loc; } - -/// getFromDILocation - Translate the DILocation quad into a DebugLoc. -DebugLoc DebugLoc::getFromDILocation(MDNode *N) { - DebugLoc Loc; - Loc.Loc.reset(N); - return Loc; -} - -/// getFromDILexicalBlock - Translate the DILexicalBlock into a DebugLoc. -DebugLoc DebugLoc::getFromDILexicalBlock(MDNode *N) { - DILexicalBlock LexBlock(N); - MDNode *Scope = LexBlock.getContext(); - if (!Scope) return DebugLoc(); - return get(LexBlock.getLineNumber(), LexBlock.getColumnNumber(), Scope, - nullptr); + return MDLocation::get(Scope->getContext(), Line, Col, + const_cast<MDNode *>(Scope), + const_cast<MDNode *>(InlinedAt)); } void DebugLoc::dump() const { #ifndef NDEBUG - if (!isUnknown()) { - dbgs() << getLine(); - if (getCol() != 0) - dbgs() << ',' << getCol(); - DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(getInlinedAt()); - if (!InlinedAtDL.isUnknown()) { - dbgs() << " @ "; - InlinedAtDL.dump(); - } else - dbgs() << "\n"; - } + if (!Loc) + return; + + dbgs() << getLine(); + if (getCol() != 0) + dbgs() << ',' << getCol(); + if (DebugLoc InlinedAtDL = DebugLoc(getInlinedAt())) { + dbgs() << " @ "; + InlinedAtDL.dump(); + } else + dbgs() << "\n"; #endif } void DebugLoc::print(raw_ostream &OS) const { - if (!isUnknown()) { - // Print source line info. - DIScope Scope(getScope()); - assert((!Scope || Scope.isScope()) && - "Scope of a DebugLoc should be null or a DIScope."); - if (Scope) - OS << Scope.getFilename(); - else - OS << "<unknown>"; - OS << ':' << getLine(); - if (getCol() != 0) - OS << ':' << getCol(); - DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(getInlinedAt()); - if (!InlinedAtDL.isUnknown()) { - OS << " @[ "; - InlinedAtDL.print(OS); - OS << " ]"; - } + if (!Loc) + return; + + // Print source line info. + auto *Scope = cast<MDScope>(getScope()); + OS << Scope->getFilename(); + OS << ':' << getLine(); + if (getCol() != 0) + OS << ':' << getCol(); + + if (DebugLoc InlinedAtDL = getInlinedAt()) { + OS << " @[ "; + InlinedAtDL.print(OS); + OS << " ]"; } } diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp index 5608589..91635f6 100644 --- a/lib/IR/DiagnosticInfo.cpp +++ b/lib/IR/DiagnosticInfo.cpp @@ -129,16 +129,16 @@ void DiagnosticInfoSampleProfile::print(DiagnosticPrinter &DP) const { } bool DiagnosticInfoOptimizationBase::isLocationAvailable() const { - return !getDebugLoc().isUnknown(); + return getDebugLoc(); } void DiagnosticInfoOptimizationBase::getLocation(StringRef *Filename, unsigned *Line, unsigned *Column) const { - DILocation DIL(getDebugLoc().getAsMDNode(getFunction().getContext())); - *Filename = DIL.getFilename(); - *Line = DIL.getLineNumber(); - *Column = DIL.getColumnNumber(); + MDLocation *L = getDebugLoc(); + *Filename = L->getFilename(); + *Line = L->getLine(); + *Column = L->getColumn(); } const std::string DiagnosticInfoOptimizationBase::getLocationStr() const { @@ -147,7 +147,7 @@ const std::string DiagnosticInfoOptimizationBase::getLocationStr() const { unsigned Column = 0; if (isLocationAvailable()) getLocation(&Filename, &Line, &Column); - return Twine(Filename + ":" + Twine(Line) + ":" + Twine(Column)).str(); + return (Filename + ":" + Twine(Line) + ":" + Twine(Column)).str(); } void DiagnosticInfoOptimizationBase::print(DiagnosticPrinter &DP) const { diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index 33e1526..d3a0934 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -215,9 +215,7 @@ LLVMContext &Function::getContext() const { return getType()->getContext(); } -FunctionType *Function::getFunctionType() const { - return cast<FunctionType>(getType()->getElementType()); -} +FunctionType *Function::getFunctionType() const { return Ty; } bool Function::isVarArg() const { return getFunctionType()->isVarArg(); @@ -242,7 +240,8 @@ void Function::eraseFromParent() { Function::Function(FunctionType *Ty, LinkageTypes Linkage, const Twine &name, Module *ParentModule) : GlobalObject(PointerType::getUnqual(Ty), Value::FunctionVal, nullptr, 0, - Linkage, name) { + Linkage, name), + Ty(Ty) { assert(FunctionType::isValidReturnType(getReturnType()) && "invalid return type"); setIsMaterializable(false); @@ -349,6 +348,12 @@ void Function::addDereferenceableAttr(unsigned i, uint64_t Bytes) { setAttributes(PAL); } +void Function::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) { + AttributeSet PAL = getAttributes(); + PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes); + setAttributes(PAL); +} + // Maintain the GC name for each function in an on-the-side table. This saves // allocating an additional word in Function for programs which do not use GC // (i.e., most programs) at the cost of increased overhead for clients which do diff --git a/lib/IR/GCOV.cpp b/lib/IR/GCOV.cpp index 7010ceb..6ed5891 100644 --- a/lib/IR/GCOV.cpp +++ b/lib/IR/GCOV.cpp @@ -555,7 +555,7 @@ FileInfo::openCoveragePath(StringRef CoveragePath) { return llvm::make_unique<raw_null_ostream>(); std::error_code EC; - auto OS = llvm::make_unique<raw_fd_ostream>(CoveragePath.str(), EC, + auto OS = llvm::make_unique<raw_fd_ostream>(CoveragePath, EC, sys::fs::F_Text); if (EC) { errs() << EC.message() << "\n"; diff --git a/lib/IR/IRBuilder.cpp b/lib/IR/IRBuilder.cpp index 90303b2..06f54c7 100644 --- a/lib/IR/IRBuilder.cpp +++ b/lib/IR/IRBuilder.cpp @@ -23,7 +23,8 @@ using namespace llvm; /// has array of i8 type filled in with the nul terminated string value /// specified. If Name is specified, it is the name of the global variable /// created. -Value *IRBuilderBase::CreateGlobalString(StringRef Str, const Twine &Name) { +GlobalVariable *IRBuilderBase::CreateGlobalString(StringRef Str, + const Twine &Name) { Constant *StrConstant = ConstantDataArray::getString(Context, Str); Module &M = *BB->getParent()->getParent(); GlobalVariable *GV = new GlobalVariable(M, StrConstant->getType(), diff --git a/lib/IR/IRPrintingPasses.cpp b/lib/IR/IRPrintingPasses.cpp index 91ccfbb..c1ac336 100644 --- a/lib/IR/IRPrintingPasses.cpp +++ b/lib/IR/IRPrintingPasses.cpp @@ -21,11 +21,14 @@ using namespace llvm; PrintModulePass::PrintModulePass() : OS(dbgs()) {} -PrintModulePass::PrintModulePass(raw_ostream &OS, const std::string &Banner) - : OS(OS), Banner(Banner) {} +PrintModulePass::PrintModulePass(raw_ostream &OS, const std::string &Banner, + bool ShouldPreserveUseListOrder) + : OS(OS), Banner(Banner), + ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {} PreservedAnalyses PrintModulePass::run(Module &M) { - OS << Banner << M; + OS << Banner; + M.print(OS, nullptr, ShouldPreserveUseListOrder); return PreservedAnalyses::all(); } @@ -46,8 +49,9 @@ class PrintModulePassWrapper : public ModulePass { public: static char ID; PrintModulePassWrapper() : ModulePass(ID) {} - PrintModulePassWrapper(raw_ostream &OS, const std::string &Banner) - : ModulePass(ID), P(OS, Banner) {} + PrintModulePassWrapper(raw_ostream &OS, const std::string &Banner, + bool ShouldPreserveUseListOrder) + : ModulePass(ID), P(OS, Banner, ShouldPreserveUseListOrder) {} bool runOnModule(Module &M) override { P.run(M); @@ -112,8 +116,9 @@ INITIALIZE_PASS(PrintBasicBlockPass, "print-bb", "Print BB to stderr", false, false) ModulePass *llvm::createPrintModulePass(llvm::raw_ostream &OS, - const std::string &Banner) { - return new PrintModulePassWrapper(OS, Banner); + const std::string &Banner, + bool ShouldPreserveUseListOrder) { + return new PrintModulePassWrapper(OS, Banner, ShouldPreserveUseListOrder); } FunctionPass *llvm::createPrintFunctionPass(llvm::raw_ostream &OS, diff --git a/lib/IR/InlineAsm.cpp b/lib/IR/InlineAsm.cpp index b456d9f..aa9e027 100644 --- a/lib/IR/InlineAsm.cpp +++ b/lib/IR/InlineAsm.cpp @@ -167,7 +167,9 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str, // Note that operand #n has a matching input. scInfo.MatchingInput = ConstraintsSoFar.size(); } else { - if (ConstraintsSoFar[N].hasMatchingInput()) + if (ConstraintsSoFar[N].hasMatchingInput() && + (size_t)ConstraintsSoFar[N].MatchingInput != + ConstraintsSoFar.size()) return true; // Note that operand #n has a matching input. ConstraintsSoFar[N].MatchingInput = ConstraintsSoFar.size(); diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp index 7d9bd7e..57c143c 100644 --- a/lib/IR/Instruction.cpp +++ b/lib/IR/Instruction.cpp @@ -62,8 +62,8 @@ void Instruction::removeFromParent() { getParent()->getInstList().remove(this); } -void Instruction::eraseFromParent() { - getParent()->getInstList().erase(this); +iplist<Instruction>::iterator Instruction::eraseFromParent() { + return getParent()->getInstList().erase(this); } /// insertBefore - Insert an unlinked instructions into a basic block diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index af2aeb9..85b7521 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -352,6 +352,12 @@ void CallInst::addDereferenceableAttr(unsigned i, uint64_t Bytes) { setAttributes(PAL); } +void CallInst::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) { + AttributeSet PAL = getAttributes(); + PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes); + setAttributes(PAL); +} + bool CallInst::hasFnAttrImpl(Attribute::AttrKind A) const { if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A)) return true; @@ -617,6 +623,12 @@ void InvokeInst::addDereferenceableAttr(unsigned i, uint64_t Bytes) { setAttributes(PAL); } +void InvokeInst::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) { + AttributeSet PAL = getAttributes(); + PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes); + setAttributes(PAL); +} + LandingPadInst *InvokeInst::getLandingPadInst() const { return cast<LandingPadInst>(getUnwindDest()->getFirstNonPHI()); } @@ -943,12 +955,10 @@ LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, : LoadInst(Ptr, Name, isVolatile, Align, NotAtomic, CrossThread, InsertAE) { } -LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, +LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, unsigned Align, AtomicOrdering Order, - SynchronizationScope SynchScope, - Instruction *InsertBef) - : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(), - Load, Ptr, InsertBef) { + SynchronizationScope SynchScope, Instruction *InsertBef) + : UnaryInstruction(Ty, Load, Ptr, InsertBef) { setVolatile(isVolatile); setAlignment(Align); setAtomic(Order, SynchScope); @@ -1258,11 +1268,7 @@ GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI) /// pointer type. /// template <typename IndexTy> -static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef<IndexTy> IdxList) { - PointerType *PTy = dyn_cast<PointerType>(Ptr->getScalarType()); - if (!PTy) return nullptr; // Type isn't a pointer type! - Type *Agg = PTy->getElementType(); - +static Type *getIndexedTypeInternal(Type *Agg, ArrayRef<IndexTy> IdxList) { // Handle the special case of the empty set index set, which is always valid. if (IdxList.empty()) return Agg; @@ -1283,17 +1289,17 @@ static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef<IndexTy> IdxList) { return CurIdx == IdxList.size() ? Agg : nullptr; } -Type *GetElementPtrInst::getIndexedType(Type *Ptr, ArrayRef<Value *> IdxList) { - return getIndexedTypeInternal(Ptr, IdxList); +Type *GetElementPtrInst::getIndexedType(Type *Ty, ArrayRef<Value *> IdxList) { + return getIndexedTypeInternal(Ty, IdxList); } -Type *GetElementPtrInst::getIndexedType(Type *Ptr, +Type *GetElementPtrInst::getIndexedType(Type *Ty, ArrayRef<Constant *> IdxList) { - return getIndexedTypeInternal(Ptr, IdxList); + return getIndexedTypeInternal(Ty, IdxList); } -Type *GetElementPtrInst::getIndexedType(Type *Ptr, ArrayRef<uint64_t> IdxList) { - return getIndexedTypeInternal(Ptr, IdxList); +Type *GetElementPtrInst::getIndexedType(Type *Ty, ArrayRef<uint64_t> IdxList) { + return getIndexedTypeInternal(Ty, IdxList); } /// hasAllZeroIndices - Return true if all of the indices of this GEP are diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h index e380665..c096a83 100644 --- a/lib/IR/LLVMContextImpl.h +++ b/lib/IR/LLVMContextImpl.h @@ -275,15 +275,17 @@ template <> struct MDNodeKeyImpl<GenericDebugNode> : MDNodeOpsKey { template <> struct MDNodeKeyImpl<MDSubrange> { int64_t Count; - int64_t Lo; + int64_t LowerBound; - MDNodeKeyImpl(int64_t Count, int64_t Lo) : Count(Count), Lo(Lo) {} - MDNodeKeyImpl(const MDSubrange *N) : Count(N->getCount()), Lo(N->getLo()) {} + MDNodeKeyImpl(int64_t Count, int64_t LowerBound) + : Count(Count), LowerBound(LowerBound) {} + MDNodeKeyImpl(const MDSubrange *N) + : Count(N->getCount()), LowerBound(N->getLowerBound()) {} bool isKeyOf(const MDSubrange *RHS) const { - return Count == RHS->getCount() && Lo == RHS->getLo(); + return Count == RHS->getCount() && LowerBound == RHS->getLowerBound(); } - unsigned getHashValue() const { return hash_combine(Count, Lo); } + unsigned getHashValue() const { return hash_combine(Count, LowerBound); } }; template <> struct MDNodeKeyImpl<MDEnumerator> { @@ -347,20 +349,20 @@ template <> struct MDNodeKeyImpl<MDDerivedType> { BaseType(BaseType), SizeInBits(SizeInBits), AlignInBits(AlignInBits), OffsetInBits(OffsetInBits), Flags(Flags), ExtraData(ExtraData) {} MDNodeKeyImpl(const MDDerivedType *N) - : Tag(N->getTag()), Name(N->getName()), File(N->getFile()), - Line(N->getLine()), Scope(N->getScope()), BaseType(N->getBaseType()), - SizeInBits(N->getSizeInBits()), AlignInBits(N->getAlignInBits()), - OffsetInBits(N->getOffsetInBits()), Flags(N->getFlags()), - ExtraData(N->getExtraData()) {} + : Tag(N->getTag()), Name(N->getName()), File(N->getRawFile()), + Line(N->getLine()), Scope(N->getRawScope()), + BaseType(N->getRawBaseType()), SizeInBits(N->getSizeInBits()), + AlignInBits(N->getAlignInBits()), OffsetInBits(N->getOffsetInBits()), + Flags(N->getFlags()), ExtraData(N->getRawExtraData()) {} bool isKeyOf(const MDDerivedType *RHS) const { return Tag == RHS->getTag() && Name == RHS->getName() && - File == RHS->getFile() && Line == RHS->getLine() && - Scope == RHS->getScope() && BaseType == RHS->getBaseType() && + File == RHS->getRawFile() && Line == RHS->getLine() && + Scope == RHS->getRawScope() && BaseType == RHS->getRawBaseType() && SizeInBits == RHS->getSizeInBits() && AlignInBits == RHS->getAlignInBits() && OffsetInBits == RHS->getOffsetInBits() && Flags == RHS->getFlags() && - ExtraData == RHS->getExtraData(); + ExtraData == RHS->getRawExtraData(); } unsigned getHashValue() const { return hash_combine(Tag, Name, File, Line, Scope, BaseType, SizeInBits, @@ -397,26 +399,26 @@ template <> struct MDNodeKeyImpl<MDCompositeType> { RuntimeLang(RuntimeLang), VTableHolder(VTableHolder), TemplateParams(TemplateParams), Identifier(Identifier) {} MDNodeKeyImpl(const MDCompositeType *N) - : Tag(N->getTag()), Name(N->getName()), File(N->getFile()), - Line(N->getLine()), Scope(N->getScope()), BaseType(N->getBaseType()), - SizeInBits(N->getSizeInBits()), AlignInBits(N->getAlignInBits()), - OffsetInBits(N->getOffsetInBits()), Flags(N->getFlags()), - Elements(N->getElements()), RuntimeLang(N->getRuntimeLang()), - VTableHolder(N->getVTableHolder()), - TemplateParams(N->getTemplateParams()), Identifier(N->getIdentifier()) { - } + : Tag(N->getTag()), Name(N->getName()), File(N->getRawFile()), + Line(N->getLine()), Scope(N->getRawScope()), + BaseType(N->getRawBaseType()), SizeInBits(N->getSizeInBits()), + AlignInBits(N->getAlignInBits()), OffsetInBits(N->getOffsetInBits()), + Flags(N->getFlags()), Elements(N->getRawElements()), + RuntimeLang(N->getRuntimeLang()), VTableHolder(N->getRawVTableHolder()), + TemplateParams(N->getRawTemplateParams()), + Identifier(N->getIdentifier()) {} bool isKeyOf(const MDCompositeType *RHS) const { return Tag == RHS->getTag() && Name == RHS->getName() && - File == RHS->getFile() && Line == RHS->getLine() && - Scope == RHS->getScope() && BaseType == RHS->getBaseType() && + File == RHS->getRawFile() && Line == RHS->getLine() && + Scope == RHS->getRawScope() && BaseType == RHS->getRawBaseType() && SizeInBits == RHS->getSizeInBits() && AlignInBits == RHS->getAlignInBits() && OffsetInBits == RHS->getOffsetInBits() && Flags == RHS->getFlags() && - Elements == RHS->getElements() && + Elements == RHS->getRawElements() && RuntimeLang == RHS->getRuntimeLang() && - VTableHolder == RHS->getVTableHolder() && - TemplateParams == RHS->getTemplateParams() && + VTableHolder == RHS->getRawVTableHolder() && + TemplateParams == RHS->getRawTemplateParams() && Identifier == RHS->getIdentifier(); } unsigned getHashValue() const { @@ -433,10 +435,10 @@ template <> struct MDNodeKeyImpl<MDSubroutineType> { MDNodeKeyImpl(int64_t Flags, Metadata *TypeArray) : Flags(Flags), TypeArray(TypeArray) {} MDNodeKeyImpl(const MDSubroutineType *N) - : Flags(N->getFlags()), TypeArray(N->getTypeArray()) {} + : Flags(N->getFlags()), TypeArray(N->getRawTypeArray()) {} bool isKeyOf(const MDSubroutineType *RHS) const { - return Flags == RHS->getFlags() && TypeArray == RHS->getTypeArray(); + return Flags == RHS->getFlags() && TypeArray == RHS->getRawTypeArray(); } unsigned getHashValue() const { return hash_combine(Flags, TypeArray); } }; @@ -484,27 +486,28 @@ template <> struct MDNodeKeyImpl<MDCompileUnit> { Subprograms(Subprograms), GlobalVariables(GlobalVariables), ImportedEntities(ImportedEntities) {} MDNodeKeyImpl(const MDCompileUnit *N) - : SourceLanguage(N->getSourceLanguage()), File(N->getFile()), + : SourceLanguage(N->getSourceLanguage()), File(N->getRawFile()), Producer(N->getProducer()), IsOptimized(N->isOptimized()), Flags(N->getFlags()), RuntimeVersion(N->getRuntimeVersion()), SplitDebugFilename(N->getSplitDebugFilename()), - EmissionKind(N->getEmissionKind()), EnumTypes(N->getEnumTypes()), - RetainedTypes(N->getRetainedTypes()), Subprograms(N->getSubprograms()), - GlobalVariables(N->getGlobalVariables()), - ImportedEntities(N->getImportedEntities()) {} + EmissionKind(N->getEmissionKind()), EnumTypes(N->getRawEnumTypes()), + RetainedTypes(N->getRawRetainedTypes()), + Subprograms(N->getRawSubprograms()), + GlobalVariables(N->getRawGlobalVariables()), + ImportedEntities(N->getRawImportedEntities()) {} bool isKeyOf(const MDCompileUnit *RHS) const { return SourceLanguage == RHS->getSourceLanguage() && - File == RHS->getFile() && Producer == RHS->getProducer() && + File == RHS->getRawFile() && Producer == RHS->getProducer() && IsOptimized == RHS->isOptimized() && Flags == RHS->getFlags() && RuntimeVersion == RHS->getRuntimeVersion() && SplitDebugFilename == RHS->getSplitDebugFilename() && EmissionKind == RHS->getEmissionKind() && - EnumTypes == RHS->getEnumTypes() && - RetainedTypes == RHS->getRetainedTypes() && - Subprograms == RHS->getSubprograms() && - GlobalVariables == RHS->getGlobalVariables() && - ImportedEntities == RHS->getImportedEntities(); + EnumTypes == RHS->getRawEnumTypes() && + RetainedTypes == RHS->getRawRetainedTypes() && + Subprograms == RHS->getRawSubprograms() && + GlobalVariables == RHS->getRawGlobalVariables() && + ImportedEntities == RHS->getRawImportedEntities(); } unsigned getHashValue() const { return hash_combine(SourceLanguage, File, Producer, IsOptimized, Flags, @@ -549,31 +552,32 @@ template <> struct MDNodeKeyImpl<MDSubprogram> { Function(Function), TemplateParams(TemplateParams), Declaration(Declaration), Variables(Variables) {} MDNodeKeyImpl(const MDSubprogram *N) - : Scope(N->getScope()), Name(N->getName()), - LinkageName(N->getLinkageName()), File(N->getFile()), - Line(N->getLine()), Type(N->getType()), + : Scope(N->getRawScope()), Name(N->getName()), + LinkageName(N->getLinkageName()), File(N->getRawFile()), + Line(N->getLine()), Type(N->getRawType()), IsLocalToUnit(N->isLocalToUnit()), IsDefinition(N->isDefinition()), - ScopeLine(N->getScopeLine()), ContainingType(N->getContainingType()), + ScopeLine(N->getScopeLine()), ContainingType(N->getRawContainingType()), Virtuality(N->getVirtuality()), VirtualIndex(N->getVirtualIndex()), Flags(N->getFlags()), IsOptimized(N->isOptimized()), - Function(N->getFunction()), TemplateParams(N->getTemplateParams()), - Declaration(N->getDeclaration()), Variables(N->getVariables()) {} + Function(N->getRawFunction()), + TemplateParams(N->getRawTemplateParams()), + Declaration(N->getRawDeclaration()), Variables(N->getRawVariables()) {} bool isKeyOf(const MDSubprogram *RHS) const { - return Scope == RHS->getScope() && Name == RHS->getName() && - LinkageName == RHS->getLinkageName() && File == RHS->getFile() && - Line == RHS->getLine() && Type == RHS->getType() && + return Scope == RHS->getRawScope() && Name == RHS->getName() && + LinkageName == RHS->getLinkageName() && File == RHS->getRawFile() && + Line == RHS->getLine() && Type == RHS->getRawType() && IsLocalToUnit == RHS->isLocalToUnit() && IsDefinition == RHS->isDefinition() && ScopeLine == RHS->getScopeLine() && - ContainingType == RHS->getContainingType() && + ContainingType == RHS->getRawContainingType() && Virtuality == RHS->getVirtuality() && VirtualIndex == RHS->getVirtualIndex() && Flags == RHS->getFlags() && IsOptimized == RHS->isOptimized() && - Function == RHS->getFunction() && - TemplateParams == RHS->getTemplateParams() && - Declaration == RHS->getDeclaration() && - Variables == RHS->getVariables(); + Function == RHS->getRawFunction() && + TemplateParams == RHS->getRawTemplateParams() && + Declaration == RHS->getRawDeclaration() && + Variables == RHS->getRawVariables(); } unsigned getHashValue() const { return hash_combine(Scope, Name, LinkageName, File, Line, Type, @@ -592,11 +596,11 @@ template <> struct MDNodeKeyImpl<MDLexicalBlock> { MDNodeKeyImpl(Metadata *Scope, Metadata *File, unsigned Line, unsigned Column) : Scope(Scope), File(File), Line(Line), Column(Column) {} MDNodeKeyImpl(const MDLexicalBlock *N) - : Scope(N->getScope()), File(N->getFile()), Line(N->getLine()), + : Scope(N->getRawScope()), File(N->getRawFile()), Line(N->getLine()), Column(N->getColumn()) {} bool isKeyOf(const MDLexicalBlock *RHS) const { - return Scope == RHS->getScope() && File == RHS->getFile() && + return Scope == RHS->getRawScope() && File == RHS->getRawFile() && Line == RHS->getLine() && Column == RHS->getColumn(); } unsigned getHashValue() const { @@ -612,11 +616,11 @@ template <> struct MDNodeKeyImpl<MDLexicalBlockFile> { MDNodeKeyImpl(Metadata *Scope, Metadata *File, unsigned Discriminator) : Scope(Scope), File(File), Discriminator(Discriminator) {} MDNodeKeyImpl(const MDLexicalBlockFile *N) - : Scope(N->getScope()), File(N->getFile()), + : Scope(N->getRawScope()), File(N->getRawFile()), Discriminator(N->getDiscriminator()) {} bool isKeyOf(const MDLexicalBlockFile *RHS) const { - return Scope == RHS->getScope() && File == RHS->getFile() && + return Scope == RHS->getRawScope() && File == RHS->getRawFile() && Discriminator == RHS->getDiscriminator(); } unsigned getHashValue() const { @@ -633,11 +637,11 @@ template <> struct MDNodeKeyImpl<MDNamespace> { MDNodeKeyImpl(Metadata *Scope, Metadata *File, StringRef Name, unsigned Line) : Scope(Scope), File(File), Name(Name), Line(Line) {} MDNodeKeyImpl(const MDNamespace *N) - : Scope(N->getScope()), File(N->getFile()), Name(N->getName()), + : Scope(N->getRawScope()), File(N->getRawFile()), Name(N->getName()), Line(N->getLine()) {} bool isKeyOf(const MDNamespace *RHS) const { - return Scope == RHS->getScope() && File == RHS->getFile() && + return Scope == RHS->getRawScope() && File == RHS->getRawFile() && Name == RHS->getName() && Line == RHS->getLine(); } unsigned getHashValue() const { @@ -651,10 +655,10 @@ template <> struct MDNodeKeyImpl<MDTemplateTypeParameter> { MDNodeKeyImpl(StringRef Name, Metadata *Type) : Name(Name), Type(Type) {} MDNodeKeyImpl(const MDTemplateTypeParameter *N) - : Name(N->getName()), Type(N->getType()) {} + : Name(N->getName()), Type(N->getRawType()) {} bool isKeyOf(const MDTemplateTypeParameter *RHS) const { - return Name == RHS->getName() && Type == RHS->getType(); + return Name == RHS->getName() && Type == RHS->getRawType(); } unsigned getHashValue() const { return hash_combine(Name, Type); } }; @@ -668,12 +672,12 @@ template <> struct MDNodeKeyImpl<MDTemplateValueParameter> { MDNodeKeyImpl(unsigned Tag, StringRef Name, Metadata *Type, Metadata *Value) : Tag(Tag), Name(Name), Type(Type), Value(Value) {} MDNodeKeyImpl(const MDTemplateValueParameter *N) - : Tag(N->getTag()), Name(N->getName()), Type(N->getType()), + : Tag(N->getTag()), Name(N->getName()), Type(N->getRawType()), Value(N->getValue()) {} bool isKeyOf(const MDTemplateValueParameter *RHS) const { return Tag == RHS->getTag() && Name == RHS->getName() && - Type == RHS->getType() && Value == RHS->getValue(); + Type == RHS->getRawType() && Value == RHS->getValue(); } unsigned getHashValue() const { return hash_combine(Tag, Name, Type, Value); } }; @@ -699,21 +703,22 @@ template <> struct MDNodeKeyImpl<MDGlobalVariable> { IsDefinition(IsDefinition), Variable(Variable), StaticDataMemberDeclaration(StaticDataMemberDeclaration) {} MDNodeKeyImpl(const MDGlobalVariable *N) - : Scope(N->getScope()), Name(N->getName()), - LinkageName(N->getLinkageName()), File(N->getFile()), - Line(N->getLine()), Type(N->getType()), + : Scope(N->getRawScope()), Name(N->getName()), + LinkageName(N->getLinkageName()), File(N->getRawFile()), + Line(N->getLine()), Type(N->getRawType()), IsLocalToUnit(N->isLocalToUnit()), IsDefinition(N->isDefinition()), - Variable(N->getVariable()), - StaticDataMemberDeclaration(N->getStaticDataMemberDeclaration()) {} + Variable(N->getRawVariable()), + StaticDataMemberDeclaration(N->getRawStaticDataMemberDeclaration()) {} bool isKeyOf(const MDGlobalVariable *RHS) const { - return Scope == RHS->getScope() && Name == RHS->getName() && - LinkageName == RHS->getLinkageName() && File == RHS->getFile() && - Line == RHS->getLine() && Type == RHS->getType() && + return Scope == RHS->getRawScope() && Name == RHS->getName() && + LinkageName == RHS->getLinkageName() && File == RHS->getRawFile() && + Line == RHS->getLine() && Type == RHS->getRawType() && IsLocalToUnit == RHS->isLocalToUnit() && IsDefinition == RHS->isDefinition() && - Variable == RHS->getVariable() && - StaticDataMemberDeclaration == RHS->getStaticDataMemberDeclaration(); + Variable == RHS->getRawVariable() && + StaticDataMemberDeclaration == + RHS->getRawStaticDataMemberDeclaration(); } unsigned getHashValue() const { return hash_combine(Scope, Name, LinkageName, File, Line, Type, @@ -731,28 +736,24 @@ template <> struct MDNodeKeyImpl<MDLocalVariable> { Metadata *Type; unsigned Arg; unsigned Flags; - Metadata *InlinedAt; MDNodeKeyImpl(unsigned Tag, Metadata *Scope, StringRef Name, Metadata *File, - unsigned Line, Metadata *Type, unsigned Arg, unsigned Flags, - Metadata *InlinedAt) + unsigned Line, Metadata *Type, unsigned Arg, unsigned Flags) : Tag(Tag), Scope(Scope), Name(Name), File(File), Line(Line), Type(Type), - Arg(Arg), Flags(Flags), InlinedAt(InlinedAt) {} + Arg(Arg), Flags(Flags) {} MDNodeKeyImpl(const MDLocalVariable *N) - : Tag(N->getTag()), Scope(N->getScope()), Name(N->getName()), - File(N->getFile()), Line(N->getLine()), Type(N->getType()), - Arg(N->getArg()), Flags(N->getFlags()), InlinedAt(N->getInlinedAt()) {} + : Tag(N->getTag()), Scope(N->getRawScope()), Name(N->getName()), + File(N->getRawFile()), Line(N->getLine()), Type(N->getRawType()), + Arg(N->getArg()), Flags(N->getFlags()) {} bool isKeyOf(const MDLocalVariable *RHS) const { - return Tag == RHS->getTag() && Scope == RHS->getScope() && - Name == RHS->getName() && File == RHS->getFile() && - Line == RHS->getLine() && Type == RHS->getType() && - Arg == RHS->getArg() && Flags == RHS->getFlags() && - InlinedAt == RHS->getInlinedAt(); + return Tag == RHS->getTag() && Scope == RHS->getRawScope() && + Name == RHS->getName() && File == RHS->getRawFile() && + Line == RHS->getLine() && Type == RHS->getRawType() && + Arg == RHS->getArg() && Flags == RHS->getFlags(); } unsigned getHashValue() const { - return hash_combine(Tag, Scope, Name, File, Line, Type, Arg, Flags, - InlinedAt); + return hash_combine(Tag, Scope, Name, File, Line, Type, Arg, Flags); } }; @@ -785,15 +786,15 @@ template <> struct MDNodeKeyImpl<MDObjCProperty> { : Name(Name), File(File), Line(Line), GetterName(GetterName), SetterName(SetterName), Attributes(Attributes), Type(Type) {} MDNodeKeyImpl(const MDObjCProperty *N) - : Name(N->getName()), File(N->getFile()), Line(N->getLine()), + : Name(N->getName()), File(N->getRawFile()), Line(N->getLine()), GetterName(N->getGetterName()), SetterName(N->getSetterName()), - Attributes(N->getAttributes()), Type(N->getType()) {} + Attributes(N->getAttributes()), Type(N->getRawType()) {} bool isKeyOf(const MDObjCProperty *RHS) const { - return Name == RHS->getName() && File == RHS->getFile() && + return Name == RHS->getName() && File == RHS->getRawFile() && Line == RHS->getLine() && GetterName == RHS->getGetterName() && SetterName == RHS->getSetterName() && - Attributes == RHS->getAttributes() && Type == RHS->getType(); + Attributes == RHS->getAttributes() && Type == RHS->getRawType(); } unsigned getHashValue() const { return hash_combine(Name, File, Line, GetterName, SetterName, Attributes, @@ -812,12 +813,12 @@ template <> struct MDNodeKeyImpl<MDImportedEntity> { StringRef Name) : Tag(Tag), Scope(Scope), Entity(Entity), Line(Line), Name(Name) {} MDNodeKeyImpl(const MDImportedEntity *N) - : Tag(N->getTag()), Scope(N->getScope()), Entity(N->getEntity()), + : Tag(N->getTag()), Scope(N->getRawScope()), Entity(N->getRawEntity()), Line(N->getLine()), Name(N->getName()) {} bool isKeyOf(const MDImportedEntity *RHS) const { - return Tag == RHS->getTag() && Scope == RHS->getScope() && - Entity == RHS->getEntity() && Line == RHS->getLine() && + return Tag == RHS->getTag() && Scope == RHS->getRawScope() && + Entity == RHS->getRawEntity() && Line == RHS->getLine() && Name == RHS->getName(); } unsigned getHashValue() const { diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp index 9a365d1..6870032 100644 --- a/lib/IR/LegacyPassManager.cpp +++ b/lib/IR/LegacyPassManager.cpp @@ -293,7 +293,7 @@ public: Pass(PT_PassManager, ID), PMDataManager() { } // Delete on the fly managers. - virtual ~MPPassManager() { + ~MPPassManager() override { for (std::map<Pass *, FunctionPassManagerImpl *>::iterator I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end(); I != E; ++I) { diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp index 0ad3c5c..93098b9 100644 --- a/lib/IR/Metadata.cpp +++ b/lib/IR/Metadata.cpp @@ -446,6 +446,10 @@ void MDNode::makeUniqued() { assert(isTemporary() && "Expected this to be temporary"); assert(!isResolved() && "Expected this to be unresolved"); + // Enable uniquing callbacks. + for (auto &Op : mutable_operands()) + Op.reset(Op.get(), this); + // Make this 'uniqued'. Storage = Uniqued; if (!countUnresolvedOperands()) @@ -1035,7 +1039,7 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) { // Handle 'dbg' as a special case since it is not stored in the hash table. if (KindID == LLVMContext::MD_dbg) { - DbgLoc = DebugLoc::getFromDILocation(Node); + DbgLoc = DebugLoc(Node); return; } @@ -1114,7 +1118,7 @@ void Instruction::getAllMetadataImpl( Result.clear(); // Handle 'dbg' as a special case since it is not stored in the hash table. - if (!DbgLoc.isUnknown()) { + if (DbgLoc) { Result.push_back( std::make_pair((unsigned)LLVMContext::MD_dbg, DbgLoc.getAsMDNode())); if (!hasMetadataHashEntry()) return; diff --git a/lib/IR/UseListOrder.cpp b/lib/IR/UseListOrder.cpp deleted file mode 100644 index d064e67..0000000 --- a/lib/IR/UseListOrder.cpp +++ /dev/null @@ -1,43 +0,0 @@ -//===- UseListOrder.cpp - Implement Use List Order ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Implement structures and command-line options for preserving use-list order. -// -//===----------------------------------------------------------------------===// - -#include "llvm/IR/UseListOrder.h" -#include "llvm/Support/CommandLine.h" - -using namespace llvm; - -static cl::opt<bool> PreserveBitcodeUseListOrder( - "preserve-bc-use-list-order", - cl::desc("Experimental support to preserve bitcode use-list order."), - cl::init(false), cl::Hidden); - -static cl::opt<bool> PreserveAssemblyUseListOrder( - "preserve-ll-use-list-order", - cl::desc("Experimental support to preserve assembly use-list order."), - cl::init(false), cl::Hidden); - -bool llvm::shouldPreserveBitcodeUseListOrder() { - return PreserveBitcodeUseListOrder; -} - -bool llvm::shouldPreserveAssemblyUseListOrder() { - return PreserveAssemblyUseListOrder; -} - -void llvm::setPreserveBitcodeUseListOrder(bool ShouldPreserve) { - PreserveBitcodeUseListOrder = ShouldPreserve; -} - -void llvm::setPreserveAssemblyUseListOrder(bool ShouldPreserve) { - PreserveAssemblyUseListOrder = ShouldPreserve; -} diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp index 78bfca4..f6eb427 100644 --- a/lib/IR/Value.cpp +++ b/lib/IR/Value.cpp @@ -525,7 +525,7 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout &DL, // Return values from call sites specifically marked as dereferenceable are // also okay. - if (ImmutableCallSite CS = V) { + if (auto CS = ImmutableCallSite(V)) { if (uint64_t Bytes = CS.getDereferenceableBytes(0)) { Type *Ty = V->getType()->getPointerElementType(); if (Ty->isSized() && DL.getTypeStoreSize(Ty) <= Bytes) @@ -595,7 +595,7 @@ bool Value::isDereferenceablePointer(const DataLayout &DL) const { APInt DerefBytes(Offset.getBitWidth(), 0); if (const Argument *A = dyn_cast<Argument>(BV)) DerefBytes = A->getDereferenceableBytes(); - else if (ImmutableCallSite CS = BV) + else if (auto CS = ImmutableCallSite(BV)) DerefBytes = CS.getDereferenceableBytes(0); if (DerefBytes.getBoolValue() && Offset.isNonNegative()) { diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index fcf48c4..fba78e9 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -87,10 +87,9 @@ struct VerifierSupport { /// \brief Track the brokenness of the module while recursively visiting. bool Broken; - bool EverBroken; explicit VerifierSupport(raw_ostream &OS) - : OS(OS), M(nullptr), Broken(false), EverBroken(false) {} + : OS(OS), M(nullptr), Broken(false) {} private: void Write(const Value *V) { @@ -111,6 +110,10 @@ private: OS << '\n'; } + template <class T> void Write(const MDTupleTypedArrayWrapper<T> &MD) { + Write(MD.get()); + } + void Write(const NamedMDNode *NMD) { if (!NMD) return; @@ -145,7 +148,7 @@ public: /// something is not correct. void CheckFailed(const Twine &Message) { OS << Message << '\n'; - EverBroken = Broken = true; + Broken = true; } /// \brief A check failed (with values to print). @@ -175,6 +178,9 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport { /// \brief Keep track of the metadata nodes that have been checked already. SmallPtrSet<const Metadata *, 32> MDNodes; + /// \brief Track unresolved string-based type references. + SmallDenseMap<const MDString *, const MDNode *, 32> UnresolvedTypeRefs; + /// \brief The personality function referenced by the LandingPadInsts. /// All LandingPadInsts within the same function must use the same /// personality function. @@ -268,8 +274,8 @@ public: visitModuleFlags(M); visitModuleIdents(M); - // Verify debug info last. - verifyDebugInfo(); + // Verify type referneces last. + verifyTypeRefs(); return !Broken; } @@ -296,8 +302,37 @@ private: void visitBasicBlock(BasicBlock &BB); void visitRangeMetadata(Instruction& I, MDNode* Range, Type* Ty); + template <class Ty> bool isValidMetadataArray(const MDTuple &N); #define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) void visit##CLASS(const CLASS &N); #include "llvm/IR/Metadata.def" + void visitMDScope(const MDScope &N); + void visitMDDerivedTypeBase(const MDDerivedTypeBase &N); + void visitMDVariable(const MDVariable &N); + void visitMDLexicalBlockBase(const MDLexicalBlockBase &N); + void visitMDTemplateParameter(const MDTemplateParameter &N); + + void visitTemplateParams(const MDNode &N, const Metadata &RawParams); + + /// \brief Check for a valid string-based type reference. + /// + /// Checks if \c MD is a string-based type reference. If it is, keeps track + /// of it (and its user, \c N) for error messages later. + bool isValidUUID(const MDNode &N, const Metadata *MD); + + /// \brief Check for a valid type reference. + /// + /// Checks for subclasses of \a MDType, or \a isValidUUID(). + bool isTypeRef(const MDNode &N, const Metadata *MD); + + /// \brief Check for a valid scope reference. + /// + /// Checks for subclasses of \a MDScope, or \a isValidUUID(). + bool isScopeRef(const MDNode &N, const Metadata *MD); + + /// \brief Check for a valid debug info reference. + /// + /// Checks for subclasses of \a DebugNode, or \a isValidUUID(). + bool isDIRef(const MDNode &N, const Metadata *MD); // InstVisitor overrides... using InstVisitor<Verifier>::visit; @@ -371,9 +406,11 @@ private: void verifyFrameRecoverIndices(); // Module-level debug info verification... - void verifyDebugInfo(); - void processInstructions(DebugInfoFinder &Finder); - void processCallInst(DebugInfoFinder &Finder, const CallInst &CI); + void verifyTypeRefs(); + template <class MapTy> + void verifyBitPieceExpression(const DbgInfoIntrinsic &I, + const MapTy &TypeRefs); + void visitUnresolvedTypeRef(const MDString *S, const MDNode *N); }; } // End anonymous namespace @@ -566,13 +603,14 @@ void Verifier::visitGlobalAlias(const GlobalAlias &GA) { void Verifier::visitNamedMDNode(const NamedMDNode &NMD) { for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) { MDNode *MD = NMD.getOperand(i); - if (!MD) - continue; if (NMD.getName() == "llvm.dbg.cu") { - Assert(isa<MDCompileUnit>(MD), "invalid compile unit", &NMD, MD); + Assert(MD && isa<MDCompileUnit>(MD), "invalid compile unit", &NMD, MD); } + if (!MD) + continue; + visitMDNode(*MD); } } @@ -658,6 +696,58 @@ void Verifier::visitMetadataAsValue(const MetadataAsValue &MDV, Function *F) { visitValueAsMetadata(*V, F); } +bool Verifier::isValidUUID(const MDNode &N, const Metadata *MD) { + auto *S = dyn_cast<MDString>(MD); + if (!S) + return false; + if (S->getString().empty()) + return false; + + // Keep track of names of types referenced via UUID so we can check that they + // actually exist. + UnresolvedTypeRefs.insert(std::make_pair(S, &N)); + return true; +} + +/// \brief Check if a value can be a reference to a type. +bool Verifier::isTypeRef(const MDNode &N, const Metadata *MD) { + return !MD || isValidUUID(N, MD) || isa<MDType>(MD); +} + +/// \brief Check if a value can be a ScopeRef. +bool Verifier::isScopeRef(const MDNode &N, const Metadata *MD) { + return !MD || isValidUUID(N, MD) || isa<MDScope>(MD); +} + +/// \brief Check if a value can be a debug info ref. +bool Verifier::isDIRef(const MDNode &N, const Metadata *MD) { + return !MD || isValidUUID(N, MD) || isa<DebugNode>(MD); +} + +template <class Ty> +bool isValidMetadataArrayImpl(const MDTuple &N, bool AllowNull) { + for (Metadata *MD : N.operands()) { + if (MD) { + if (!isa<Ty>(MD)) + return false; + } else { + if (!AllowNull) + return false; + } + } + return true; +} + +template <class Ty> +bool isValidMetadataArray(const MDTuple &N) { + return isValidMetadataArrayImpl<Ty>(N, /* AllowNull */ false); +} + +template <class Ty> +bool isValidMetadataNullArray(const MDTuple &N) { + return isValidMetadataArrayImpl<Ty>(N, /* AllowNull */ true); +} + void Verifier::visitMDLocation(const MDLocation &N) { Assert(N.getRawScope() && isa<MDLocalScope>(N.getRawScope()), "location requires a valid scope", &N, N.getRawScope()); @@ -669,8 +759,14 @@ void Verifier::visitGenericDebugNode(const GenericDebugNode &N) { Assert(N.getTag(), "invalid tag", &N); } +void Verifier::visitMDScope(const MDScope &N) { + if (auto *F = N.getRawFile()) + Assert(isa<MDFile>(F), "invalid file", &N, F); +} + void Verifier::visitMDSubrange(const MDSubrange &N) { Assert(N.getTag() == dwarf::DW_TAG_subrange_type, "invalid tag", &N); + Assert(N.getCount() >= -1, "invalid subrange count", &N); } void Verifier::visitMDEnumerator(const MDEnumerator &N) { @@ -683,7 +779,39 @@ void Verifier::visitMDBasicType(const MDBasicType &N) { "invalid tag", &N); } +void Verifier::visitMDDerivedTypeBase(const MDDerivedTypeBase &N) { + // Common scope checks. + visitMDScope(N); + + Assert(isScopeRef(N, N.getScope()), "invalid scope", &N, N.getScope()); + Assert(isTypeRef(N, N.getBaseType()), "invalid base type", &N, + N.getBaseType()); + + // FIXME: Sink this into the subclass verifies. + if (!N.getFile() || N.getFile()->getFilename().empty()) { + // Check whether the filename is allowed to be empty. + uint16_t Tag = N.getTag(); + Assert( + Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type || + Tag == dwarf::DW_TAG_pointer_type || + Tag == dwarf::DW_TAG_ptr_to_member_type || + Tag == dwarf::DW_TAG_reference_type || + Tag == dwarf::DW_TAG_rvalue_reference_type || + Tag == dwarf::DW_TAG_restrict_type || + Tag == dwarf::DW_TAG_array_type || + Tag == dwarf::DW_TAG_enumeration_type || + Tag == dwarf::DW_TAG_subroutine_type || + Tag == dwarf::DW_TAG_inheritance || Tag == dwarf::DW_TAG_friend || + Tag == dwarf::DW_TAG_structure_type || + Tag == dwarf::DW_TAG_member || Tag == dwarf::DW_TAG_typedef, + "derived/composite type requires a filename", &N, N.getFile()); + } +} + void Verifier::visitMDDerivedType(const MDDerivedType &N) { + // Common derived type checks. + visitMDDerivedTypeBase(N); + Assert(N.getTag() == dwarf::DW_TAG_typedef || N.getTag() == dwarf::DW_TAG_pointer_type || N.getTag() == dwarf::DW_TAG_ptr_to_member_type || @@ -696,9 +824,30 @@ void Verifier::visitMDDerivedType(const MDDerivedType &N) { N.getTag() == dwarf::DW_TAG_inheritance || N.getTag() == dwarf::DW_TAG_friend, "invalid tag", &N); + if (N.getTag() == dwarf::DW_TAG_ptr_to_member_type) { + Assert(isTypeRef(N, N.getExtraData()), "invalid pointer to member type", &N, + N.getExtraData()); + } +} + +static bool hasConflictingReferenceFlags(unsigned Flags) { + return (Flags & DebugNode::FlagLValueReference) && + (Flags & DebugNode::FlagRValueReference); +} + +void Verifier::visitTemplateParams(const MDNode &N, const Metadata &RawParams) { + auto *Params = dyn_cast<MDTuple>(&RawParams); + Assert(Params, "invalid template params", &N, &RawParams); + for (Metadata *Op : Params->operands()) { + Assert(Op && isa<MDTemplateParameter>(Op), "invalid template parameter", &N, + Params, Op); + } } void Verifier::visitMDCompositeType(const MDCompositeType &N) { + // Common derived type checks. + visitMDDerivedTypeBase(N); + Assert(N.getTag() == dwarf::DW_TAG_array_type || N.getTag() == dwarf::DW_TAG_structure_type || N.getTag() == dwarf::DW_TAG_union_type || @@ -706,10 +855,29 @@ void Verifier::visitMDCompositeType(const MDCompositeType &N) { N.getTag() == dwarf::DW_TAG_subroutine_type || N.getTag() == dwarf::DW_TAG_class_type, "invalid tag", &N); + + Assert(!N.getRawElements() || isa<MDTuple>(N.getRawElements()), + "invalid composite elements", &N, N.getRawElements()); + Assert(isTypeRef(N, N.getRawVTableHolder()), "invalid vtable holder", &N, + N.getRawVTableHolder()); + Assert(!N.getRawElements() || isa<MDTuple>(N.getRawElements()), + "invalid composite elements", &N, N.getRawElements()); + Assert(!hasConflictingReferenceFlags(N.getFlags()), "invalid reference flags", + &N); + if (auto *Params = N.getRawTemplateParams()) + visitTemplateParams(N, *Params); } void Verifier::visitMDSubroutineType(const MDSubroutineType &N) { Assert(N.getTag() == dwarf::DW_TAG_subroutine_type, "invalid tag", &N); + if (auto *Types = N.getRawTypeArray()) { + Assert(isa<MDTuple>(Types), "invalid composite elements", &N, Types); + for (Metadata *Ty : N.getTypeArray()->operands()) { + Assert(isTypeRef(N, Ty), "invalid subroutine type ref", &N, Types, Ty); + } + } + Assert(!hasConflictingReferenceFlags(N.getFlags()), "invalid reference flags", + &N); } void Verifier::visitMDFile(const MDFile &N) { @@ -718,45 +886,195 @@ void Verifier::visitMDFile(const MDFile &N) { void Verifier::visitMDCompileUnit(const MDCompileUnit &N) { Assert(N.getTag() == dwarf::DW_TAG_compile_unit, "invalid tag", &N); + + // Don't bother verifying the compilation directory or producer string + // as those could be empty. + Assert(N.getRawFile() && isa<MDFile>(N.getRawFile()), + "invalid file", &N, N.getRawFile()); + Assert(!N.getFile()->getFilename().empty(), "invalid filename", &N, + N.getFile()); + + if (auto *Array = N.getRawEnumTypes()) { + Assert(isa<MDTuple>(Array), "invalid enum list", &N, Array); + for (Metadata *Op : N.getEnumTypes()->operands()) { + auto *Enum = dyn_cast_or_null<MDCompositeType>(Op); + Assert(Enum && Enum->getTag() == dwarf::DW_TAG_enumeration_type, + "invalid enum type", &N, N.getEnumTypes(), Op); + } + } + if (auto *Array = N.getRawRetainedTypes()) { + Assert(isa<MDTuple>(Array), "invalid retained type list", &N, Array); + for (Metadata *Op : N.getRetainedTypes()->operands()) { + Assert(Op && isa<MDType>(Op), "invalid retained type", &N, Op); + } + } + if (auto *Array = N.getRawSubprograms()) { + Assert(isa<MDTuple>(Array), "invalid subprogram list", &N, Array); + for (Metadata *Op : N.getSubprograms()->operands()) { + Assert(Op && isa<MDSubprogram>(Op), "invalid subprogram ref", &N, Op); + } + } + if (auto *Array = N.getRawGlobalVariables()) { + Assert(isa<MDTuple>(Array), "invalid global variable list", &N, Array); + for (Metadata *Op : N.getGlobalVariables()->operands()) { + Assert(Op && isa<MDGlobalVariable>(Op), "invalid global variable ref", &N, + Op); + } + } + if (auto *Array = N.getRawImportedEntities()) { + Assert(isa<MDTuple>(Array), "invalid imported entity list", &N, Array); + for (Metadata *Op : N.getImportedEntities()->operands()) { + Assert(Op && isa<MDImportedEntity>(Op), "invalid imported entity ref", &N, + Op); + } + } } void Verifier::visitMDSubprogram(const MDSubprogram &N) { Assert(N.getTag() == dwarf::DW_TAG_subprogram, "invalid tag", &N); + Assert(isScopeRef(N, N.getRawScope()), "invalid scope", &N, N.getRawScope()); + if (auto *T = N.getRawType()) + Assert(isa<MDSubroutineType>(T), "invalid subroutine type", &N, T); + Assert(isTypeRef(N, N.getRawContainingType()), "invalid containing type", &N, + N.getRawContainingType()); + if (auto *RawF = N.getRawFunction()) { + auto *FMD = dyn_cast<ConstantAsMetadata>(RawF); + auto *F = FMD ? FMD->getValue() : nullptr; + auto *FT = F ? dyn_cast<PointerType>(F->getType()) : nullptr; + Assert(F && FT && isa<FunctionType>(FT->getElementType()), + "invalid function", &N, F, FT); + } + if (auto *Params = N.getRawTemplateParams()) + visitTemplateParams(N, *Params); + if (auto *S = N.getRawDeclaration()) { + Assert(isa<MDSubprogram>(S) && !cast<MDSubprogram>(S)->isDefinition(), + "invalid subprogram declaration", &N, S); + } + if (auto *RawVars = N.getRawVariables()) { + auto *Vars = dyn_cast<MDTuple>(RawVars); + Assert(Vars, "invalid variable list", &N, RawVars); + for (Metadata *Op : Vars->operands()) { + Assert(Op && isa<MDLocalVariable>(Op), "invalid local variable", &N, Vars, + Op); + } + } + Assert(!hasConflictingReferenceFlags(N.getFlags()), "invalid reference flags", + &N); + + auto *F = N.getFunction(); + if (!F) + return; + + // Check that all !dbg attachments lead to back to N (or, at least, another + // subprogram that describes the same function). + // + // FIXME: Check this incrementally while visiting !dbg attachments. + // FIXME: Only check when N is the canonical subprogram for F. + SmallPtrSet<const MDNode *, 32> Seen; + for (auto &BB : *F) + for (auto &I : BB) { + // Be careful about using MDLocation here since we might be dealing with + // broken code (this is the Verifier after all). + MDLocation *DL = + dyn_cast_or_null<MDLocation>(I.getDebugLoc().getAsMDNode()); + if (!DL) + continue; + if (!Seen.insert(DL).second) + continue; + + MDLocalScope *Scope = DL->getInlinedAtScope(); + if (Scope && !Seen.insert(Scope).second) + continue; + + MDSubprogram *SP = Scope ? Scope->getSubprogram() : nullptr; + if (SP && !Seen.insert(SP).second) + continue; + + // FIXME: Once N is canonical, check "SP == &N". + Assert(SP->describes(F), + "!dbg attachment points at wrong subprogram for function", &N, F, + &I, DL, Scope, SP); + } } -void Verifier::visitMDLexicalBlock(const MDLexicalBlock &N) { +void Verifier::visitMDLexicalBlockBase(const MDLexicalBlockBase &N) { Assert(N.getTag() == dwarf::DW_TAG_lexical_block, "invalid tag", &N); + Assert(N.getRawScope() && isa<MDLocalScope>(N.getRawScope()), + "invalid local scope", &N, N.getRawScope()); +} + +void Verifier::visitMDLexicalBlock(const MDLexicalBlock &N) { + visitMDLexicalBlockBase(N); + + Assert(N.getLine() || !N.getColumn(), + "cannot have column info without line info", &N); } void Verifier::visitMDLexicalBlockFile(const MDLexicalBlockFile &N) { - Assert(N.getTag() == dwarf::DW_TAG_lexical_block, "invalid tag", &N); + visitMDLexicalBlockBase(N); } void Verifier::visitMDNamespace(const MDNamespace &N) { Assert(N.getTag() == dwarf::DW_TAG_namespace, "invalid tag", &N); + if (auto *S = N.getRawScope()) + Assert(isa<MDScope>(S), "invalid scope ref", &N, S); +} + +void Verifier::visitMDTemplateParameter(const MDTemplateParameter &N) { + Assert(isTypeRef(N, N.getType()), "invalid type ref", &N, N.getType()); } void Verifier::visitMDTemplateTypeParameter(const MDTemplateTypeParameter &N) { + visitMDTemplateParameter(N); + Assert(N.getTag() == dwarf::DW_TAG_template_type_parameter, "invalid tag", &N); } void Verifier::visitMDTemplateValueParameter( const MDTemplateValueParameter &N) { + visitMDTemplateParameter(N); + Assert(N.getTag() == dwarf::DW_TAG_template_value_parameter || N.getTag() == dwarf::DW_TAG_GNU_template_template_param || N.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack, "invalid tag", &N); } +void Verifier::visitMDVariable(const MDVariable &N) { + if (auto *S = N.getRawScope()) + Assert(isa<MDScope>(S), "invalid scope", &N, S); + Assert(isTypeRef(N, N.getRawType()), "invalid type ref", &N, N.getRawType()); + if (auto *F = N.getRawFile()) + Assert(isa<MDFile>(F), "invalid file", &N, F); +} + void Verifier::visitMDGlobalVariable(const MDGlobalVariable &N) { + // Checks common to all variables. + visitMDVariable(N); + Assert(N.getTag() == dwarf::DW_TAG_variable, "invalid tag", &N); + Assert(!N.getName().empty(), "missing global variable name", &N); + if (auto *V = N.getRawVariable()) { + Assert(isa<ConstantAsMetadata>(V) && + !isa<Function>(cast<ConstantAsMetadata>(V)->getValue()), + "invalid global varaible ref", &N, V); + } + if (auto *Member = N.getRawStaticDataMemberDeclaration()) { + Assert(isa<MDDerivedType>(Member), "invalid static data member declaration", + &N, Member); + } } void Verifier::visitMDLocalVariable(const MDLocalVariable &N) { + // Checks common to all variables. + visitMDVariable(N); + Assert(N.getTag() == dwarf::DW_TAG_auto_variable || N.getTag() == dwarf::DW_TAG_arg_variable, "invalid tag", &N); + Assert(N.getRawScope() && isa<MDLocalScope>(N.getRawScope()), + "local variable requires a valid scope", &N, N.getRawScope()); } void Verifier::visitMDExpression(const MDExpression &N) { @@ -765,12 +1083,20 @@ void Verifier::visitMDExpression(const MDExpression &N) { void Verifier::visitMDObjCProperty(const MDObjCProperty &N) { Assert(N.getTag() == dwarf::DW_TAG_APPLE_property, "invalid tag", &N); + if (auto *T = N.getRawType()) + Assert(isa<MDType>(T), "invalid type ref", &N, T); + if (auto *F = N.getRawFile()) + Assert(isa<MDFile>(F), "invalid file", &N, F); } void Verifier::visitMDImportedEntity(const MDImportedEntity &N) { Assert(N.getTag() == dwarf::DW_TAG_imported_module || N.getTag() == dwarf::DW_TAG_imported_declaration, "invalid tag", &N); + if (auto *S = N.getRawScope()) + Assert(isa<MDScope>(S), "invalid scope for imported entity", &N, S); + Assert(isDIRef(N, N.getEntity()), "invalid imported entity", &N, + N.getEntity()); } void Verifier::visitComdat(const Comdat &C) { @@ -2133,7 +2459,7 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector<Value*, 16> Idxs(GEP.idx_begin(), GEP.idx_end()); Type *ElTy = - GetElementPtrInst::getIndexedType(GEP.getPointerOperandType(), Idxs); + GetElementPtrInst::getIndexedType(GEP.getSourceElementType(), Idxs); Assert(ElTy, "Invalid indices for GEP pointer type!", &GEP); Assert(GEP.getType()->getScalarType()->isPointerTy() && @@ -2214,9 +2540,7 @@ void Verifier::visitRangeMetadata(Instruction& I, void Verifier::visitLoadInst(LoadInst &LI) { PointerType *PTy = dyn_cast<PointerType>(LI.getOperand(0)->getType()); Assert(PTy, "Load operand must be a pointer.", &LI); - Type *ElTy = PTy->getElementType(); - Assert(ElTy == LI.getType(), - "Load result type does not match pointer operand type!", &LI, ElTy); + Type *ElTy = LI.getType(); Assert(LI.getAlignment() <= Value::MaximumAlignment, "huge alignment values are unsupported", &LI); if (LI.isAtomic()) { @@ -2885,6 +3209,8 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { Assert(!SawFrameEscape, "multiple calls to llvm.frameescape in one function", &CI); for (Value *Arg : CI.arg_operands()) { + if (isa<ConstantPointerNull>(Arg)) + continue; // Null values are allowed as placeholders. auto *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts()); Assert(AI && AI->isStaticAlloca(), "llvm.frameescape only accepts static allocas", &CI); @@ -2909,16 +3235,11 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { break; } - case Intrinsic::eh_unwindhelp: { - auto *AI = dyn_cast<AllocaInst>(CI.getArgOperand(0)->stripPointerCasts()); - Assert(AI && AI->isStaticAlloca(), - "llvm.eh.unwindhelp requires a static alloca", &CI); - break; - } - case Intrinsic::experimental_gc_statepoint: Assert(!CI.isInlineAsm(), "gc.statepoint support for inline assembly unimplemented", &CI); + Assert(CI.getParent()->getParent()->hasGC(), + "Enclosing function does not use GC.", &CI); VerifyStatepoint(ImmutableCallSite(&CI)); break; @@ -2926,6 +3247,8 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { case Intrinsic::experimental_gc_result_float: case Intrinsic::experimental_gc_result_ptr: case Intrinsic::experimental_gc_result: { + Assert(CI.getParent()->getParent()->hasGC(), + "Enclosing function does not use GC.", &CI); // Are we tied to a statepoint properly? CallSite StatepointCS(CI.getArgOperand(0)); const Function *StatepointFn = @@ -3035,6 +3358,25 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { }; } +/// \brief Carefully grab the subprogram from a local scope. +/// +/// This carefully grabs the subprogram from a local scope, avoiding the +/// built-in assertions that would typically fire. +static MDSubprogram *getSubprogram(Metadata *LocalScope) { + if (!LocalScope) + return nullptr; + + if (auto *SP = dyn_cast<MDSubprogram>(LocalScope)) + return SP; + + if (auto *LB = dyn_cast<MDLexicalBlockBase>(LocalScope)) + return getSubprogram(LB->getRawScope()); + + // Just return null; broken scope chains are checked elsewhere. + assert(!isa<MDLocalScope>(LocalScope) && "Unknown type of local scope"); + return nullptr; +} + template <class DbgIntrinsicTy> void Verifier::visitDbgIntrinsic(StringRef Kind, DbgIntrinsicTy &DII) { auto *MD = cast<MetadataAsValue>(DII.getArgOperand(0))->getMetadata(); @@ -3047,61 +3389,145 @@ void Verifier::visitDbgIntrinsic(StringRef Kind, DbgIntrinsicTy &DII) { Assert(isa<MDExpression>(DII.getRawExpression()), "invalid llvm.dbg." + Kind + " intrinsic expression", &DII, DII.getRawExpression()); -} -void Verifier::verifyDebugInfo() { - // Run the debug info verifier only if the regular verifier succeeds, since - // sometimes checks that have already failed will cause crashes here. - if (EverBroken || !VerifyDebugInfo) - return; + // Ignore broken !dbg attachments; they're checked elsewhere. + if (MDNode *N = DII.getDebugLoc().getAsMDNode()) + if (!isa<MDLocation>(N)) + return; - DebugInfoFinder Finder; - Finder.processModule(*M); - processInstructions(Finder); + BasicBlock *BB = DII.getParent(); + Function *F = BB ? BB->getParent() : nullptr; + + // The scopes for variables and !dbg attachments must agree. + MDLocalVariable *Var = DII.getVariable(); + MDLocation *Loc = DII.getDebugLoc(); + Assert(Loc, "llvm.dbg." + Kind + " intrinsic requires a !dbg attachment", + &DII, BB, F); + + MDSubprogram *VarSP = getSubprogram(Var->getRawScope()); + MDSubprogram *LocSP = getSubprogram(Loc->getRawScope()); + if (!VarSP || !LocSP) + return; // Broken scope chains are checked elsewhere. + + Assert(VarSP == LocSP, "mismatched subprogram between llvm.dbg." + Kind + + " variable and !dbg attachment", + &DII, BB, F, Var, Var->getScope()->getSubprogram(), Loc, + Loc->getScope()->getSubprogram()); +} + +template <class MapTy> +static uint64_t getVariableSize(const MDLocalVariable &V, const MapTy &Map) { + // Be careful of broken types (checked elsewhere). + const Metadata *RawType = V.getRawType(); + while (RawType) { + // Try to get the size directly. + if (auto *T = dyn_cast<MDType>(RawType)) + if (uint64_t Size = T->getSizeInBits()) + return Size; + + if (auto *DT = dyn_cast<MDDerivedType>(RawType)) { + // Look at the base type. + RawType = DT->getRawBaseType(); + continue; + } - // Verify Debug Info. - // - // NOTE: The loud braces are necessary for MSVC compatibility. - for (DICompileUnit CU : Finder.compile_units()) { - Assert(CU.Verify(), "DICompileUnit does not Verify!", CU); - } - for (DISubprogram S : Finder.subprograms()) { - Assert(S.Verify(), "DISubprogram does not Verify!", S); - } - for (DIGlobalVariable GV : Finder.global_variables()) { - Assert(GV.Verify(), "DIGlobalVariable does not Verify!", GV); - } - for (DIType T : Finder.types()) { - Assert(T.Verify(), "DIType does not Verify!", T); + if (auto *S = dyn_cast<MDString>(RawType)) { + // Don't error on missing types (checked elsewhere). + RawType = Map.lookup(S); + continue; + } + + // Missing type or size. + break; } - for (DIScope S : Finder.scopes()) { - Assert(S.Verify(), "DIScope does not Verify!", S); + + // Fail gracefully. + return 0; +} + +template <class MapTy> +void Verifier::verifyBitPieceExpression(const DbgInfoIntrinsic &I, + const MapTy &TypeRefs) { + MDLocalVariable *V; + MDExpression *E; + if (auto *DVI = dyn_cast<DbgValueInst>(&I)) { + V = dyn_cast_or_null<MDLocalVariable>(DVI->getRawVariable()); + E = dyn_cast_or_null<MDExpression>(DVI->getRawExpression()); + } else { + auto *DDI = cast<DbgDeclareInst>(&I); + V = dyn_cast_or_null<MDLocalVariable>(DDI->getRawVariable()); + E = dyn_cast_or_null<MDExpression>(DDI->getRawExpression()); } + + // We don't know whether this intrinsic verified correctly. + if (!V || !E || !E->isValid()) + return; + + // Nothing to do if this isn't a bit piece expression. + if (!E->isBitPiece()) + return; + + // If there's no size, the type is broken, but that should be checked + // elsewhere. + uint64_t VarSize = getVariableSize(*V, TypeRefs); + if (!VarSize) + return; + + unsigned PieceSize = E->getBitPieceSize(); + unsigned PieceOffset = E->getBitPieceOffset(); + Assert(PieceSize + PieceOffset <= VarSize, + "piece is larger than or outside of variable", &I, V, E); + Assert(PieceSize != VarSize, "piece covers entire variable", &I, V, E); } -void Verifier::processInstructions(DebugInfoFinder &Finder) { - for (const Function &F : *M) - for (auto I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { - if (MDNode *MD = I->getMetadata(LLVMContext::MD_dbg)) - Finder.processLocation(*M, DILocation(MD)); - if (const CallInst *CI = dyn_cast<CallInst>(&*I)) - processCallInst(Finder, *CI); - } +void Verifier::visitUnresolvedTypeRef(const MDString *S, const MDNode *N) { + // This is in its own function so we get an error for each bad type ref (not + // just the first). + Assert(false, "unresolved type ref", S, N); } -void Verifier::processCallInst(DebugInfoFinder &Finder, const CallInst &CI) { - if (Function *F = CI.getCalledFunction()) - if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID()) - switch (ID) { - case Intrinsic::dbg_declare: - Finder.processDeclare(*M, cast<DbgDeclareInst>(&CI)); - break; - case Intrinsic::dbg_value: - Finder.processValue(*M, cast<DbgValueInst>(&CI)); - break; - default: - break; - } +void Verifier::verifyTypeRefs() { + auto *CUs = M->getNamedMetadata("llvm.dbg.cu"); + if (!CUs) + return; + + // Visit all the compile units again to map the type references. + SmallDenseMap<const MDString *, const MDType *, 32> TypeRefs; + for (auto *CU : CUs->operands()) + if (auto Ts = cast<MDCompileUnit>(CU)->getRetainedTypes()) + for (MDType *Op : Ts) + if (auto *T = dyn_cast<MDCompositeType>(Op)) + if (auto *S = T->getRawIdentifier()) { + UnresolvedTypeRefs.erase(S); + TypeRefs.insert(std::make_pair(S, T)); + } + + // Verify debug info intrinsic bit piece expressions. This needs a second + // pass through the intructions, since we haven't built TypeRefs yet when + // verifying functions, and simply queuing the DbgInfoIntrinsics to evaluate + // later/now would queue up some that could be later deleted. + for (const Function &F : *M) + for (const BasicBlock &BB : F) + for (const Instruction &I : BB) + if (auto *DII = dyn_cast<DbgInfoIntrinsic>(&I)) + verifyBitPieceExpression(*DII, TypeRefs); + + // Return early if all typerefs were resolved. + if (UnresolvedTypeRefs.empty()) + return; + + // Sort the unresolved references by name so the output is deterministic. + typedef std::pair<const MDString *, const MDNode *> TypeRef; + SmallVector<TypeRef, 32> Unresolved(UnresolvedTypeRefs.begin(), + UnresolvedTypeRefs.end()); + std::sort(Unresolved.begin(), Unresolved.end(), + [](const TypeRef &LHS, const TypeRef &RHS) { + return LHS.first->getString() < RHS.first->getString(); + }); + + // Visit the unresolved refs (printing out the errors). + for (const TypeRef &TR : Unresolved) + visitUnresolvedTypeRef(TR.first, TR.second); } //===----------------------------------------------------------------------===// diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp index a6f980b..b4a7011 100644 --- a/lib/LTO/LTOCodeGenerator.cpp +++ b/lib/LTO/LTOCodeGenerator.cpp @@ -38,7 +38,6 @@ #include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" -#include "llvm/Support/FormattedStream.h" #include "llvm/Support/Host.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Signals.h" @@ -215,7 +214,8 @@ bool LTOCodeGenerator::writeMergedModules(const char *path, } // write bitcode to it - WriteBitcodeToFile(IRLinker.getModule(), Out.os()); + WriteBitcodeToFile(IRLinker.getModule(), Out.os(), + /* ShouldPreserveUseListOrder */ true); Out.os().close(); if (Out.os().has_error()) { @@ -566,24 +566,20 @@ bool LTOCodeGenerator::optimize(bool DisableInline, return true; } -bool LTOCodeGenerator::compileOptimized(raw_ostream &out, std::string &errMsg) { +bool LTOCodeGenerator::compileOptimized(raw_pwrite_stream &out, + std::string &errMsg) { if (!this->determineTarget(errMsg)) return false; Module *mergedModule = IRLinker.getModule(); - // Mark which symbols can not be internalized - this->applyScopeRestrictions(); - legacy::PassManager codeGenPasses; - formatted_raw_ostream Out(out); - // If the bitcode files contain ARC code and were compiled with optimization, // the ObjCARCContractPass must be run, so do it unconditionally here. codeGenPasses.add(createObjCARCContractPass()); - if (TargetMach->addPassesToEmitFile(codeGenPasses, Out, + if (TargetMach->addPassesToEmitFile(codeGenPasses, out, TargetMachine::CGFT_ObjectFile)) { errMsg = "target file type not supported"; return false; diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp index 49aa97d..5cdbca6 100644 --- a/lib/LTO/LTOModule.cpp +++ b/lib/LTO/LTOModule.cpp @@ -267,7 +267,7 @@ LTOModule::objcClassNameFromExpression(const Constant *c, std::string &name) { Constant *cn = gvn->getInitializer(); if (ConstantDataArray *ca = dyn_cast<ConstantDataArray>(cn)) { if (ca->isCString()) { - name = ".objc_class_name_" + ca->getAsCString().str(); + name = (".objc_class_name_" + ca->getAsCString()).str(); return true; } } diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index 21edc50..03ab9fb 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -1269,15 +1269,11 @@ void ModuleLinker::stripReplacedSubprograms() { if (!CompileUnits) return; for (unsigned I = 0, E = CompileUnits->getNumOperands(); I != E; ++I) { - DICompileUnit CU(CompileUnits->getOperand(I)); + DICompileUnit CU = cast<MDCompileUnit>(CompileUnits->getOperand(I)); assert(CU && "Expected valid compile unit"); - DITypedArray<DISubprogram> SPs(CU.getSubprograms()); - assert(SPs && "Expected valid subprogram array"); - - for (unsigned S = 0, SE = SPs.getNumElements(); S != SE; ++S) { - DISubprogram SP = SPs.getElement(S); - if (!SP || !SP.getFunction() || !Functions.count(SP.getFunction())) + for (MDSubprogram *SP : CU->getSubprograms()) { + if (!SP || !SP->getFunction() || !Functions.count(SP->getFunction())) continue; // Prevent DebugInfoFinder from tagging this as the canonical subprogram, diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index c99a3ee..8cb01c4 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -79,17 +79,6 @@ public: uint8_t other, uint32_t shndx, bool Reserved); }; -struct ELFRelocationEntry { - uint64_t Offset; // Where is the relocation. - const MCSymbol *Symbol; // The symbol to relocate with. - unsigned Type; // The type of the relocation. - uint64_t Addend; // The addend to use. - - ELFRelocationEntry(uint64_t Offset, const MCSymbol *Symbol, unsigned Type, - uint64_t Addend) - : Offset(Offset), Symbol(Symbol), Type(Type), Addend(Addend) {} -}; - class ELFObjectWriter : public MCObjectWriter { FragmentWriter FWriter; @@ -103,22 +92,13 @@ class ELFObjectWriter : public MCObjectWriter { static bool isLocal(const MCSymbolData &Data, bool isUsedInReloc); static bool IsELFMetaDataSection(const MCSectionData &SD); static uint64_t DataSectionSize(const MCSectionData &SD); - static uint64_t GetSectionFileSize(const MCAsmLayout &Layout, - const MCSectionData &SD); static uint64_t GetSectionAddressSize(const MCAsmLayout &Layout, const MCSectionData &SD); - void WriteDataSectionData(MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCSectionELF &Section); - - /*static bool isFixupKindX86RIPRel(unsigned Kind) { - return Kind == X86::reloc_riprel_4byte || - Kind == X86::reloc_riprel_4byte_movq_load; - }*/ + void writeDataSectionData(MCAssembler &Asm, const MCAsmLayout &Layout, + const MCSectionData &SD); - /// ELFSymbolData - Helper struct for containing some precomputed - /// information on symbols. + /// Helper struct for containing some precomputed information on symbols. struct ELFSymbolData { MCSymbolData *SymbolData; uint64_t StringIndex; @@ -185,7 +165,7 @@ class ELFObjectWriter : public MCObjectWriter { } public: - ELFObjectWriter(MCELFObjectTargetWriter *MOTW, raw_ostream &OS, + ELFObjectWriter(MCELFObjectTargetWriter *MOTW, raw_pwrite_stream &OS, bool IsLittleEndian) : MCObjectWriter(OS, IsLittleEndian), FWriter(IsLittleEndian), TargetObjectWriter(MOTW), NeedsGOT(false) {} @@ -204,7 +184,7 @@ class ELFObjectWriter : public MCObjectWriter { MCObjectWriter::reset(); } - virtual ~ELFObjectWriter(); + ~ELFObjectWriter() override; void WriteWord(uint64_t W) { if (is64Bit()) @@ -218,7 +198,6 @@ class ELFObjectWriter : public MCObjectWriter { } void WriteHeader(const MCAssembler &Asm, - uint64_t SectionDataSize, unsigned NumberOfSections); void WriteSymbol(SymbolTableWriter &Writer, ELFSymbolData &MSD, @@ -245,8 +224,6 @@ class ELFObjectWriter : public MCObjectWriter { typedef DenseMap<const MCSectionELF*, const MCSymbol*> GroupMapTy; // Map from a signature symbol to the group section typedef DenseMap<const MCSymbol*, const MCSectionELF*> RevGroupMapTy; - // Map from a section to the section with the relocations - typedef DenseMap<const MCSectionELF*, const MCSectionELF*> RelMapTy; // Map from a section to its offset typedef DenseMap<const MCSectionELF*, uint64_t> SectionOffsetMapTy; @@ -255,23 +232,18 @@ class ELFObjectWriter : public MCObjectWriter { /// \param Asm - The assembler. /// \param SectionIndexMap - Maps a section to its index. /// \param RevGroupMap - Maps a signature symbol to the group section. - /// \param NumRegularSections - Number of non-relocation sections. void computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout, const SectionIndexMapTy &SectionIndexMap, - const RevGroupMapTy &RevGroupMap, - unsigned NumRegularSections); + const RevGroupMapTy &RevGroupMap); - void computeIndexMap(MCAssembler &Asm, - SectionIndexMapTy &SectionIndexMap, - RelMapTy &RelMap); + void computeIndexMap(MCAssembler &Asm, SectionIndexMapTy &SectionIndexMap); MCSectionData *createRelocationSection(MCAssembler &Asm, const MCSectionData &SD); void CompressDebugSections(MCAssembler &Asm, MCAsmLayout &Layout); - void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout, - const RelMapTy &RelMap); + void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout); void CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout, SectionIndexMapTy &SectionIndexMap); @@ -279,23 +251,18 @@ class ELFObjectWriter : public MCObjectWriter { // Create the sections that show up in the symbol table. Currently // those are the .note.GNU-stack section and the group sections. void createIndexedSections(MCAssembler &Asm, MCAsmLayout &Layout, - GroupMapTy &GroupMap, - RevGroupMapTy &RevGroupMap, - SectionIndexMapTy &SectionIndexMap, - RelMapTy &RelMap); + GroupMapTy &GroupMap, RevGroupMapTy &RevGroupMap, + SectionIndexMapTy &SectionIndexMap); void ExecutePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) override; - void writeSectionHeader(MCAssembler &Asm, const GroupMapTy &GroupMap, + void writeSectionHeader(ArrayRef<const MCSectionELF *> Sections, + MCAssembler &Asm, const GroupMapTy &GroupMap, const MCAsmLayout &Layout, const SectionIndexMapTy &SectionIndexMap, - const RelMapTy &RelMap, const SectionOffsetMapTy &SectionOffsetMap); - void ComputeSectionOrder(MCAssembler &Asm, - std::vector<const MCSectionELF*> &Sections); - void WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags, uint64_t Address, uint64_t Offset, uint64_t Size, uint32_t Link, uint32_t Info, @@ -308,6 +275,7 @@ class ELFObjectWriter : public MCObjectWriter { bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, const MCSymbolData &DataA, + const MCSymbolData *DataB, const MCFragment &FB, bool InSet, bool IsPCRel) const override; @@ -317,7 +285,6 @@ class ELFObjectWriter : public MCObjectWriter { void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; void writeSection(MCAssembler &Asm, const SectionIndexMapTy &SectionIndexMap, - const RelMapTy &RelMap, uint32_t GroupSymbolIndex, uint64_t Offset, uint64_t Size, uint64_t Alignment, const MCSectionELF &Section); @@ -384,8 +351,6 @@ void SymbolTableWriter::writeSymbol(uint32_t name, uint8_t info, uint64_t value, uint16_t Index = LargeIndex ? uint16_t(ELF::SHN_XINDEX) : shndx; - raw_svector_ostream OS(SymtabF->getContents()); - if (Is64Bit) { write(*SymtabF, name); // st_name write(*SymtabF, info); // st_info @@ -438,7 +403,6 @@ ELFObjectWriter::~ELFObjectWriter() // Emit the ELF header. void ELFObjectWriter::WriteHeader(const MCAssembler &Asm, - uint64_t SectionDataSize, unsigned NumberOfSections) { // ELF Header // ---------- @@ -472,8 +436,7 @@ void ELFObjectWriter::WriteHeader(const MCAssembler &Asm, Write32(ELF::EV_CURRENT); // e_version WriteWord(0); // e_entry, no entry point in .o file WriteWord(0); // e_phoff, no program header for .o - WriteWord(SectionDataSize + (is64Bit() ? sizeof(ELF::Elf64_Ehdr) : - sizeof(ELF::Elf32_Ehdr))); // e_shoff = sec hdr table off in bytes + WriteWord(0); // e_shoff = sec hdr table off in bytes // e_flags = whatever the target wants Write32(Asm.getELFHeaderEFlags()); @@ -628,7 +591,7 @@ void ELFObjectWriter::WriteSymbol(SymbolTableWriter &Writer, ELFSymbolData &MSD, if (ESize) { int64_t Res; - if (!ESize->EvaluateAsAbsolute(Res, Layout)) + if (!ESize->evaluateKnownAbsolute(Res, Layout)) report_fatal_error("Size expression must be absolute."); Size = Res; } @@ -969,8 +932,7 @@ bool ELFObjectWriter::isLocal(const MCSymbolData &Data, bool isUsedInReloc) { } void ELFObjectWriter::computeIndexMap(MCAssembler &Asm, - SectionIndexMapTy &SectionIndexMap, - RelMapTy &RelMap) { + SectionIndexMapTy &SectionIndexMap) { unsigned Index = 1; for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) { @@ -994,17 +956,15 @@ void ELFObjectWriter::computeIndexMap(MCAssembler &Asm, if (MCSectionData *RelSD = createRelocationSection(Asm, SD)) { const MCSectionELF *RelSection = static_cast<const MCSectionELF *>(&RelSD->getSection()); - RelMap[RelSection] = &Section; SectionIndexMap[RelSection] = Index++; } } } -void -ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout, - const SectionIndexMapTy &SectionIndexMap, - const RevGroupMapTy &RevGroupMap, - unsigned NumRegularSections) { +void ELFObjectWriter::computeSymbolTable( + MCAssembler &Asm, const MCAsmLayout &Layout, + const SectionIndexMapTy &SectionIndexMap, + const RevGroupMapTy &RevGroupMap) { // FIXME: Is this the correct place to do this? // FIXME: Why is an undefined reference to _GLOBAL_OFFSET_TABLE_ needed? if (NeedsGOT) { @@ -1167,15 +1127,12 @@ ELFObjectWriter::createRelocationSection(MCAssembler &Asm, EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rel) : sizeof(ELF::Elf32_Rel); unsigned Flags = 0; - StringRef Group = ""; - if (Section.getFlags() & ELF::SHF_GROUP) { + if (Section.getFlags() & ELF::SHF_GROUP) Flags = ELF::SHF_GROUP; - Group = Section.getGroup()->getName(); - } - const MCSectionELF *RelaSection = Ctx.getELFSection( + const MCSectionELF *RelaSection = Ctx.createELFRelSection( RelaSectionName, hasRelocationAddend() ? ELF::SHT_RELA : ELF::SHT_REL, - Flags, EntrySize, Group, true); + Flags, EntrySize, Section.getGroup(), &Section); return &Asm.getOrCreateSectionData(*RelaSection); } @@ -1324,8 +1281,7 @@ void ELFObjectWriter::CompressDebugSections(MCAssembler &Asm, } } -void ELFObjectWriter::WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout, - const RelMapTy &RelMap) { +void ELFObjectWriter::WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout) { for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) { MCSectionData &RelSD = *it; const MCSectionELF &RelSection = @@ -1335,7 +1291,7 @@ void ELFObjectWriter::WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout, if (Type != ELF::SHT_REL && Type != ELF::SHT_RELA) continue; - const MCSectionELF *Section = RelMap.lookup(&RelSection); + const MCSectionELF *Section = RelSection.getAssociatedSection(); MCSectionData &SD = Asm.getOrCreateSectionData(*Section); RelSD.setAlignment(is64Bit() ? 8 : 4); @@ -1362,31 +1318,14 @@ void ELFObjectWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type, WriteWord(EntrySize); // sh_entsize } -// ELF doesn't require relocations to be in any order. We sort by the r_offset, -// just to match gnu as for easier comparison. The use type is an arbitrary way -// of making the sort deterministic. -static int cmpRel(const ELFRelocationEntry *AP, const ELFRelocationEntry *BP) { - const ELFRelocationEntry &A = *AP; - const ELFRelocationEntry &B = *BP; - if (A.Offset != B.Offset) - return B.Offset - A.Offset; - if (B.Type != A.Type) - return A.Type - B.Type; - //llvm_unreachable("ELFRelocs might be unstable!"); - return 0; -} - -static void sortRelocs(const MCAssembler &Asm, - std::vector<ELFRelocationEntry> &Relocs) { - array_pod_sort(Relocs.begin(), Relocs.end(), cmpRel); -} - void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm, MCDataFragment *F, const MCSectionData *SD) { std::vector<ELFRelocationEntry> &Relocs = Relocations[SD]; - sortRelocs(Asm, Relocs); + // Sort the relocation entries. Most targets just sort by Offset, but some + // (e.g., MIPS) have additional constraints. + TargetObjectWriter->sortRelocs(Asm, Relocs); for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { const ELFRelocationEntry &Entry = Relocs[e - i - 1]; @@ -1473,12 +1412,9 @@ void ELFObjectWriter::CreateMetadataSections( ShStrTabBuilder.data().end()); } -void ELFObjectWriter::createIndexedSections(MCAssembler &Asm, - MCAsmLayout &Layout, - GroupMapTy &GroupMap, - RevGroupMapTy &RevGroupMap, - SectionIndexMapTy &SectionIndexMap, - RelMapTy &RelMap) { +void ELFObjectWriter::createIndexedSections( + MCAssembler &Asm, MCAsmLayout &Layout, GroupMapTy &GroupMap, + RevGroupMapTy &RevGroupMap, SectionIndexMapTy &SectionIndexMap) { MCContext &Ctx = Asm.getContext(); // Build the groups @@ -1502,7 +1438,7 @@ void ELFObjectWriter::createIndexedSections(MCAssembler &Asm, GroupMap[Group] = SignatureSymbol; } - computeIndexMap(Asm, SectionIndexMap, RelMap); + computeIndexMap(Asm, SectionIndexMap); // Add sections to the groups for (MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end(); @@ -1522,7 +1458,6 @@ void ELFObjectWriter::createIndexedSections(MCAssembler &Asm, void ELFObjectWriter::writeSection(MCAssembler &Asm, const SectionIndexMapTy &SectionIndexMap, - const RelMapTy &RelMap, uint32_t GroupSymbolIndex, uint64_t Offset, uint64_t Size, uint64_t Alignment, @@ -1531,16 +1466,19 @@ void ELFObjectWriter::writeSection(MCAssembler &Asm, uint64_t sh_info = 0; switch(Section.getType()) { + default: + // Nothing to do. + break; + case ELF::SHT_DYNAMIC: sh_link = ShStrTabBuilder.getOffset(Section.getSectionName()); - sh_info = 0; break; case ELF::SHT_REL: case ELF::SHT_RELA: { sh_link = SymbolTableIndex; assert(sh_link && ".symtab not found"); - const MCSectionELF *InfoSection = RelMap.find(&Section)->second; + const MCSectionELF *InfoSection = Section.getAssociatedSection(); sh_info = SectionIndexMap.lookup(InfoSection); break; } @@ -1555,45 +1493,15 @@ void ELFObjectWriter::writeSection(MCAssembler &Asm, sh_link = SymbolTableIndex; break; - case ELF::SHT_PROGBITS: - case ELF::SHT_STRTAB: - case ELF::SHT_NOBITS: - case ELF::SHT_NOTE: - case ELF::SHT_NULL: - case ELF::SHT_ARM_ATTRIBUTES: - case ELF::SHT_INIT_ARRAY: - case ELF::SHT_FINI_ARRAY: - case ELF::SHT_PREINIT_ARRAY: - case ELF::SHT_X86_64_UNWIND: - case ELF::SHT_MIPS_REGINFO: - case ELF::SHT_MIPS_OPTIONS: - case ELF::SHT_MIPS_ABIFLAGS: - // Nothing to do. - break; - case ELF::SHT_GROUP: sh_link = SymbolTableIndex; sh_info = GroupSymbolIndex; break; - - default: - llvm_unreachable("FIXME: sh_type value not supported!"); } if (TargetObjectWriter->getEMachine() == ELF::EM_ARM && - Section.getType() == ELF::SHT_ARM_EXIDX) { - StringRef SecName(Section.getSectionName()); - if (SecName == ".ARM.exidx") { - sh_link = SectionIndexMap.lookup(Asm.getContext().getELFSection( - ".text", ELF::SHT_PROGBITS, ELF::SHF_EXECINSTR | ELF::SHF_ALLOC)); - } else if (SecName.startswith(".ARM.exidx")) { - StringRef GroupName = - Section.getGroup() ? Section.getGroup()->getName() : ""; - sh_link = SectionIndexMap.lookup(Asm.getContext().getELFSection( - SecName.substr(sizeof(".ARM.exidx") - 1), ELF::SHT_PROGBITS, - ELF::SHF_EXECINSTR | ELF::SHF_ALLOC, 0, GroupName)); - } - } + Section.getType() == ELF::SHT_ARM_EXIDX) + sh_link = SectionIndexMap.lookup(Section.getAssociatedSection()); WriteSecHdrEntry(ShStrTabBuilder.getOffset(Section.getSectionName()), Section.getType(), @@ -1617,13 +1525,6 @@ uint64_t ELFObjectWriter::DataSectionSize(const MCSectionData &SD) { return Ret; } -uint64_t ELFObjectWriter::GetSectionFileSize(const MCAsmLayout &Layout, - const MCSectionData &SD) { - if (IsELFMetaDataSection(SD)) - return DataSectionSize(SD); - return Layout.getSectionFileSize(&SD); -} - uint64_t ELFObjectWriter::GetSectionAddressSize(const MCAsmLayout &Layout, const MCSectionData &SD) { if (IsELFMetaDataSection(SD)) @@ -1631,14 +1532,9 @@ uint64_t ELFObjectWriter::GetSectionAddressSize(const MCAsmLayout &Layout, return Layout.getSectionAddressSize(&SD); } -void ELFObjectWriter::WriteDataSectionData(MCAssembler &Asm, +void ELFObjectWriter::writeDataSectionData(MCAssembler &Asm, const MCAsmLayout &Layout, - const MCSectionELF &Section) { - const MCSectionData &SD = Asm.getOrCreateSectionData(Section); - - uint64_t Padding = OffsetToAlignment(OS.tell(), SD.getAlignment()); - WriteZeros(Padding); - + const MCSectionData &SD) { if (IsELFMetaDataSection(SD)) { for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e; ++i) { @@ -1652,28 +1548,20 @@ void ELFObjectWriter::WriteDataSectionData(MCAssembler &Asm, } void ELFObjectWriter::writeSectionHeader( - MCAssembler &Asm, const GroupMapTy &GroupMap, const MCAsmLayout &Layout, - const SectionIndexMapTy &SectionIndexMap, const RelMapTy &RelMap, + ArrayRef<const MCSectionELF *> Sections, MCAssembler &Asm, + const GroupMapTy &GroupMap, const MCAsmLayout &Layout, + const SectionIndexMapTy &SectionIndexMap, const SectionOffsetMapTy &SectionOffsetMap) { - const unsigned NumSections = Asm.size() + 1; - - std::vector<const MCSectionELF*> Sections; - Sections.resize(NumSections - 1); - - for (SectionIndexMapTy::const_iterator i= - SectionIndexMap.begin(), e = SectionIndexMap.end(); i != e; ++i) { - const std::pair<const MCSectionELF*, uint32_t> &p = *i; - Sections[p.second - 1] = p.first; - } + const unsigned NumSections = Asm.size(); // Null section first. uint64_t FirstSectionSize = - NumSections >= ELF::SHN_LORESERVE ? NumSections : 0; + (NumSections + 1) >= ELF::SHN_LORESERVE ? NumSections + 1 : 0; uint32_t FirstSectionLink = ShstrtabIndex >= ELF::SHN_LORESERVE ? ShstrtabIndex : 0; WriteSecHdrEntry(0, 0, 0, 0, 0, FirstSectionSize, FirstSectionLink, 0, 0, 0); - for (unsigned i = 0; i < NumSections - 1; ++i) { + for (unsigned i = 0; i < NumSections; ++i) { const MCSectionELF &Section = *Sections[i]; const MCSectionData &SD = Asm.getOrCreateSectionData(Section); uint32_t GroupSymbolIndex; @@ -1685,39 +1573,9 @@ void ELFObjectWriter::writeSectionHeader( uint64_t Size = GetSectionAddressSize(Layout, SD); - writeSection(Asm, SectionIndexMap, RelMap, GroupSymbolIndex, - SectionOffsetMap.lookup(&Section), Size, - SD.getAlignment(), Section); - } -} - -void ELFObjectWriter::ComputeSectionOrder(MCAssembler &Asm, - std::vector<const MCSectionELF*> &Sections) { - for (MCAssembler::iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it) { - const MCSectionELF &Section = - static_cast<const MCSectionELF &>(it->getSection()); - if (Section.getType() == ELF::SHT_GROUP) - Sections.push_back(&Section); - } - - for (MCAssembler::iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it) { - const MCSectionELF &Section = - static_cast<const MCSectionELF &>(it->getSection()); - if (Section.getType() != ELF::SHT_GROUP && - Section.getType() != ELF::SHT_REL && - Section.getType() != ELF::SHT_RELA) - Sections.push_back(&Section); - } - - for (MCAssembler::iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it) { - const MCSectionELF &Section = - static_cast<const MCSectionELF &>(it->getSection()); - if (Section.getType() == ELF::SHT_REL || - Section.getType() == ELF::SHT_RELA) - Sections.push_back(&Section); + writeSection(Asm, SectionIndexMap, GroupSymbolIndex, + SectionOffsetMap.lookup(&Section), Size, SD.getAlignment(), + Section); } } @@ -1727,102 +1585,77 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm, RevGroupMapTy RevGroupMap; SectionIndexMapTy SectionIndexMap; - unsigned NumUserSections = Asm.size(); - CompressDebugSections(Asm, const_cast<MCAsmLayout &>(Layout)); - - DenseMap<const MCSectionELF*, const MCSectionELF*> RelMap; - const unsigned NumUserAndRelocSections = Asm.size(); - createIndexedSections(Asm, const_cast<MCAsmLayout&>(Layout), GroupMap, - RevGroupMap, SectionIndexMap, RelMap); - const unsigned AllSections = Asm.size(); - const unsigned NumIndexedSections = AllSections - NumUserAndRelocSections; - - unsigned NumRegularSections = NumUserSections + NumIndexedSections; + createIndexedSections(Asm, const_cast<MCAsmLayout &>(Layout), GroupMap, + RevGroupMap, SectionIndexMap); // Compute symbol table information. - computeSymbolTable(Asm, Layout, SectionIndexMap, RevGroupMap, - NumRegularSections); + computeSymbolTable(Asm, Layout, SectionIndexMap, RevGroupMap); - WriteRelocations(Asm, const_cast<MCAsmLayout&>(Layout), RelMap); + WriteRelocations(Asm, const_cast<MCAsmLayout &>(Layout)); CreateMetadataSections(const_cast<MCAssembler&>(Asm), const_cast<MCAsmLayout&>(Layout), SectionIndexMap); - uint64_t NaturalAlignment = is64Bit() ? 8 : 4; - uint64_t HeaderSize = is64Bit() ? sizeof(ELF::Elf64_Ehdr) : - sizeof(ELF::Elf32_Ehdr); - uint64_t FileOff = HeaderSize; - + unsigned NumSections = Asm.size(); std::vector<const MCSectionELF*> Sections; - ComputeSectionOrder(Asm, Sections); - unsigned NumSections = Sections.size(); - SectionOffsetMapTy SectionOffsetMap; - for (unsigned i = 0; i < NumRegularSections + 1; ++i) { - const MCSectionELF &Section = *Sections[i]; - const MCSectionData &SD = Asm.getOrCreateSectionData(Section); + Sections.resize(NumSections); - FileOff = RoundUpToAlignment(FileOff, SD.getAlignment()); - - // Remember the offset into the file for this section. - SectionOffsetMap[&Section] = FileOff; + for (auto &Pair : SectionIndexMap) + Sections[Pair.second - 1] = Pair.first; - // Get the size of the section in the output file (including padding). - FileOff += GetSectionFileSize(Layout, SD); - } - - FileOff = RoundUpToAlignment(FileOff, NaturalAlignment); - - const unsigned SectionHeaderOffset = FileOff - HeaderSize; + SectionOffsetMapTy SectionOffsetMap; - uint64_t SectionHeaderEntrySize = is64Bit() ? - sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr); - FileOff += (NumSections + 1) * SectionHeaderEntrySize; + // Write out the ELF header ... + WriteHeader(Asm, NumSections + 1); - for (unsigned i = NumRegularSections + 1; i < NumSections; ++i) { + // ... then the sections ... + for (unsigned i = 0; i < NumSections; ++i) { const MCSectionELF &Section = *Sections[i]; const MCSectionData &SD = Asm.getOrCreateSectionData(Section); - - FileOff = RoundUpToAlignment(FileOff, SD.getAlignment()); + uint64_t Padding = OffsetToAlignment(OS.tell(), SD.getAlignment()); + WriteZeros(Padding); // Remember the offset into the file for this section. - SectionOffsetMap[&Section] = FileOff; + SectionOffsetMap[&Section] = OS.tell(); - // Get the size of the section in the output file (including padding). - FileOff += GetSectionFileSize(Layout, SD); + writeDataSectionData(Asm, Layout, SD); } - // Write out the ELF header ... - WriteHeader(Asm, SectionHeaderOffset, NumSections + 1); - - // ... then the regular sections ... - // + because of .shstrtab - for (unsigned i = 0; i < NumRegularSections + 1; ++i) - WriteDataSectionData(Asm, Layout, *Sections[i]); - + uint64_t NaturalAlignment = is64Bit() ? 8 : 4; uint64_t Padding = OffsetToAlignment(OS.tell(), NaturalAlignment); WriteZeros(Padding); + const unsigned SectionHeaderOffset = OS.tell(); + // ... then the section header table ... - writeSectionHeader(Asm, GroupMap, Layout, SectionIndexMap, RelMap, + writeSectionHeader(Sections, Asm, GroupMap, Layout, SectionIndexMap, SectionOffsetMap); - // ... and then the remaining sections ... - for (unsigned i = NumRegularSections + 1; i < NumSections; ++i) - WriteDataSectionData(Asm, Layout, *Sections[i]); + if (is64Bit()) { + uint64_t Val = SectionHeaderOffset; + if (sys::IsLittleEndianHost != IsLittleEndian) + sys::swapByteOrder(Val); + OS.pwrite(reinterpret_cast<char *>(&Val), sizeof(Val), + offsetof(ELF::Elf64_Ehdr, e_shoff)); + } else { + uint32_t Val = SectionHeaderOffset; + if (sys::IsLittleEndianHost != IsLittleEndian) + sys::swapByteOrder(Val); + OS.pwrite(reinterpret_cast<char *>(&Val), sizeof(Val), + offsetof(ELF::Elf32_Ehdr, e_shoff)); + } } -bool -ELFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, - const MCSymbolData &DataA, - const MCFragment &FB, - bool InSet, - bool IsPCRel) const { - if (::isWeak(DataA)) +bool ELFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl( + const MCAssembler &Asm, const MCSymbolData &DataA, + const MCSymbolData *DataB, const MCFragment &FB, bool InSet, + bool IsPCRel) const { + if (!InSet && (::isWeak(DataA) || (DataB && ::isWeak(*DataB)))) return false; return MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl( - Asm, DataA, FB,InSet, IsPCRel); + Asm, DataA, DataB, FB, InSet, IsPCRel); } bool ELFObjectWriter::isWeak(const MCSymbolData &SD) const { @@ -1830,7 +1663,7 @@ bool ELFObjectWriter::isWeak(const MCSymbolData &SD) const { } MCObjectWriter *llvm::createELFObjectWriter(MCELFObjectTargetWriter *MOTW, - raw_ostream &OS, + raw_pwrite_stream &OS, bool IsLittleEndian) { return new ELFObjectWriter(MOTW, OS, IsLittleEndian); } diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 62f5279..144d355 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -36,11 +36,10 @@ using namespace llvm; namespace { -class MCAsmStreamer : public MCStreamer { -protected: +class MCAsmStreamer final : public MCStreamer { + std::unique_ptr<formatted_raw_ostream> OSOwner; formatted_raw_ostream &OS; const MCAsmInfo *MAI; -private: std::unique_ptr<MCInstPrinter> InstPrinter; std::unique_ptr<MCCodeEmitter> Emitter; std::unique_ptr<MCAsmBackend> AsmBackend; @@ -57,14 +56,15 @@ private: void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override; public: - MCAsmStreamer(MCContext &Context, formatted_raw_ostream &os, + MCAsmStreamer(MCContext &Context, std::unique_ptr<formatted_raw_ostream> os, bool isVerboseAsm, bool useDwarfDirectory, MCInstPrinter *printer, MCCodeEmitter *emitter, MCAsmBackend *asmbackend, bool showInst) - : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()), - InstPrinter(printer), Emitter(emitter), AsmBackend(asmbackend), - CommentStream(CommentToEmit), IsVerboseAsm(isVerboseAsm), - ShowInst(showInst), UseDwarfDirectory(useDwarfDirectory) { + : MCStreamer(Context), OSOwner(std::move(os)), OS(*OSOwner), + MAI(Context.getAsmInfo()), InstPrinter(printer), Emitter(emitter), + AsmBackend(asmbackend), CommentStream(CommentToEmit), + IsVerboseAsm(isVerboseAsm), ShowInst(showInst), + UseDwarfDirectory(useDwarfDirectory) { if (InstPrinter && IsVerboseAsm) InstPrinter->setCommentStream(CommentStream); } @@ -1262,7 +1262,7 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &S // If we have an AsmPrinter, use that to print, otherwise print the MCInst. if (InstPrinter) - InstPrinter->printInst(&Inst, OS, ""); + InstPrinter->printInst(&Inst, OS, "", STI); else Inst.print(OS); EmitEOL(); @@ -1314,10 +1314,10 @@ void MCAsmStreamer::FinishImpl() { } MCStreamer *llvm::createAsmStreamer(MCContext &Context, - formatted_raw_ostream &OS, + std::unique_ptr<formatted_raw_ostream> OS, bool isVerboseAsm, bool useDwarfDirectory, MCInstPrinter *IP, MCCodeEmitter *CE, MCAsmBackend *MAB, bool ShowInst) { - return new MCAsmStreamer(Context, OS, isVerboseAsm, useDwarfDirectory, IP, CE, - MAB, ShowInst); + return new MCAsmStreamer(Context, std::move(OS), isVerboseAsm, + useDwarfDirectory, IP, CE, MAB, ShowInst); } diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 857eafc..d09e383 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -229,8 +229,9 @@ uint64_t MCAsmLayout::getSectionFileSize(const MCSectionData *SD) const { return getSectionAddressSize(SD); } -uint64_t MCAsmLayout::computeBundlePadding(const MCFragment *F, - uint64_t FOffset, uint64_t FSize) { +uint64_t llvm::computeBundlePadding(const MCAssembler &Assembler, + const MCFragment *F, + uint64_t FOffset, uint64_t FSize) { uint64_t BundleSize = Assembler.getBundleAlignSize(); assert(BundleSize > 0 && "computeBundlePadding should only be called if bundling is enabled"); @@ -332,6 +333,7 @@ MCSectionData::getSubsectionInsertionPoint(unsigned Subsection) { getFragmentList().insert(IP, F); F->setParent(this); } + return IP; } @@ -497,14 +499,12 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout, } else { const MCSymbolRefExpr *A = Target.getSymA(); const MCSymbol &SA = A->getSymbol(); - if (A->getKind() != MCSymbolRefExpr::VK_None || - SA.AliasedSymbol().isUndefined()) { + if (A->getKind() != MCSymbolRefExpr::VK_None || SA.isUndefined()) { IsResolved = false; } else { const MCSymbolData &DataA = getSymbolData(SA); - IsResolved = - getWriter().IsSymbolRefDifferenceFullyResolvedImpl(*this, DataA, - *DF, false, true); + IsResolved = getWriter().IsSymbolRefDifferenceFullyResolvedImpl( + *this, DataA, nullptr, *DF, false, true); } } } else { @@ -514,12 +514,12 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout, Value = Target.getConstant(); if (const MCSymbolRefExpr *A = Target.getSymA()) { - const MCSymbol &Sym = A->getSymbol().AliasedSymbol(); + const MCSymbol &Sym = A->getSymbol(); if (Sym.isDefined()) Value += Layout.getSymbolOffset(&getSymbolData(Sym)); } if (const MCSymbolRefExpr *B = Target.getSymB()) { - const MCSymbol &Sym = B->getSymbol().AliasedSymbol(); + const MCSymbol &Sym = B->getSymbol(); if (Sym.isDefined()) Value -= Layout.getSymbolOffset(&getSymbolData(Sym)); } @@ -634,7 +634,12 @@ void MCAsmLayout::layoutFragment(MCFragment *F) { // The fragment's offset will point to after the padding, and its computed // size won't include the padding. // - if (Assembler.isBundlingEnabled() && F->hasInstructions()) { + // When the -mc-relax-all flag is used, we optimize bundling by writting the + // bundle padding directly into fragments when the instructions are emitted + // inside the streamer. + // + if (Assembler.isBundlingEnabled() && !Assembler.getRelaxAll() && + F->hasInstructions()) { assert(isa<MCEncodedFragment>(F) && "Only MCEncodedFragment implementations have instructions"); uint64_t FSize = Assembler.computeFragmentSize(*this, *F); @@ -642,7 +647,8 @@ void MCAsmLayout::layoutFragment(MCFragment *F) { if (FSize > Assembler.getBundleAlignSize()) report_fatal_error("Fragment can't be larger than a bundle size"); - uint64_t RequiredBundlePadding = computeBundlePadding(F, F->Offset, FSize); + uint64_t RequiredBundlePadding = computeBundlePadding(Assembler, F, + F->Offset, FSize); if (RequiredBundlePadding > UINT8_MAX) report_fatal_error("Padding cannot exceed 255 bytes"); F->setBundlePadding(static_cast<uint8_t>(RequiredBundlePadding)); @@ -657,24 +663,18 @@ static void writeFragmentContents(const MCFragment &F, MCObjectWriter *OW) { OW->WriteBytes(EF.getContents()); } -/// \brief Write the fragment \p F to the output file. -static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFragment &F) { - MCObjectWriter *OW = &Asm.getWriter(); - - // FIXME: Embed in fragments instead? - uint64_t FragmentSize = Asm.computeFragmentSize(Layout, F); - +void MCAssembler::writeFragmentPadding(const MCFragment &F, uint64_t FSize, + MCObjectWriter *OW) const { // Should NOP padding be written out before this fragment? unsigned BundlePadding = F.getBundlePadding(); if (BundlePadding > 0) { - assert(Asm.isBundlingEnabled() && + assert(isBundlingEnabled() && "Writing bundle padding with disabled bundling"); assert(F.hasInstructions() && "Writing bundle padding for a fragment without instructions"); - unsigned TotalLength = BundlePadding + static_cast<unsigned>(FragmentSize); - if (F.alignToBundleEnd() && TotalLength > Asm.getBundleAlignSize()) { + unsigned TotalLength = BundlePadding + static_cast<unsigned>(FSize); + if (F.alignToBundleEnd() && TotalLength > getBundleAlignSize()) { // If the padding itself crosses a bundle boundary, it must be emitted // in 2 pieces, since even nop instructions must not cross boundaries. // v--------------v <- BundleAlignSize @@ -683,16 +683,27 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout, // | Prev |####|####| F | // ---------------------------- // ^-------------------^ <- TotalLength - unsigned DistanceToBoundary = TotalLength - Asm.getBundleAlignSize(); - if (!Asm.getBackend().writeNopData(DistanceToBoundary, OW)) + unsigned DistanceToBoundary = TotalLength - getBundleAlignSize(); + if (!getBackend().writeNopData(DistanceToBoundary, OW)) report_fatal_error("unable to write NOP sequence of " + Twine(DistanceToBoundary) + " bytes"); BundlePadding -= DistanceToBoundary; } - if (!Asm.getBackend().writeNopData(BundlePadding, OW)) + if (!getBackend().writeNopData(BundlePadding, OW)) report_fatal_error("unable to write NOP sequence of " + Twine(BundlePadding) + " bytes"); } +} + +/// \brief Write the fragment \p F to the output file. +static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment &F) { + MCObjectWriter *OW = &Asm.getWriter(); + + // FIXME: Embed in fragments instead? + uint64_t FragmentSize = Asm.computeFragmentSize(Layout, F); + + Asm.writeFragmentPadding(F, FragmentSize, OW); // This variable (and its dummy usage) is to participate in the assert at // the end of the function. diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index 3cb3ea1..5f8e3c1 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -139,6 +139,11 @@ MCSymbol *MCContext::getOrCreateFrameAllocSymbol(StringRef FuncName, "$frame_escape_" + Twine(Idx)); } +MCSymbol *MCContext::getOrCreateParentFrameOffsetSymbol(StringRef FuncName) { + return GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) + FuncName + + "$parent_frame_offset"); +} + MCSymbol *MCContext::CreateSymbol(StringRef Name, bool AlwaysAddSuffix) { // Determine whether this is an assembler temporary or normal label, if used. bool IsTemporary = false; @@ -257,41 +262,63 @@ MCContext::getMachOSection(StringRef Segment, StringRef Section, Reserved2, Kind, Begin); } -const MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type, - unsigned Flags, - const char *BeginSymName) { - return getELFSection(Section, Type, Flags, 0, "", BeginSymName); -} - void MCContext::renameELFSection(const MCSectionELF *Section, StringRef Name) { StringRef GroupName; if (const MCSymbol *Group = Section->getGroup()) GroupName = Group->getName(); - ELFUniquingMap.erase(SectionGroupPair(Section->getSectionName(), GroupName)); - auto I = - ELFUniquingMap.insert(std::make_pair(SectionGroupPair(Name, GroupName), - Section)).first; - StringRef CachedName = I->first.first; + unsigned UniqueID = Section->getUniqueID(); + ELFUniquingMap.erase( + ELFSectionKey{Section->getSectionName(), GroupName, UniqueID}); + auto I = ELFUniquingMap.insert(std::make_pair( + ELFSectionKey{Name, GroupName, UniqueID}, + Section)).first; + StringRef CachedName = I->first.SectionName; const_cast<MCSectionELF*>(Section)->setSectionName(CachedName); } +const MCSectionELF * +MCContext::createELFRelSection(StringRef Name, unsigned Type, unsigned Flags, + unsigned EntrySize, const MCSymbol *Group, + const MCSectionELF *Associated) { + StringMap<bool>::iterator I; + bool Inserted; + std::tie(I, Inserted) = ELFRelSecNames.insert(std::make_pair(Name, true)); + + return new (*this) + MCSectionELF(I->getKey(), Type, Flags, SectionKind::getReadOnly(), + EntrySize, Group, true, nullptr, Associated); +} + const MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type, unsigned Flags, unsigned EntrySize, - StringRef Group, bool Unique, + StringRef Group, unsigned UniqueID, const char *BeginSymName) { + MCSymbol *GroupSym = nullptr; + if (!Group.empty()) + GroupSym = GetOrCreateSymbol(Group); + + return getELFSection(Section, Type, Flags, EntrySize, GroupSym, UniqueID, + BeginSymName, nullptr); +} + +const MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type, + unsigned Flags, unsigned EntrySize, + const MCSymbol *GroupSym, + unsigned UniqueID, + const char *BeginSymName, + const MCSectionELF *Associated) { + StringRef Group = ""; + if (GroupSym) + Group = GroupSym->getName(); // Do the lookup, if we have a hit, return it. auto IterBool = ELFUniquingMap.insert( - std::make_pair(SectionGroupPair(Section, Group), nullptr)); + std::make_pair(ELFSectionKey{Section, Group, UniqueID}, nullptr)); auto &Entry = *IterBool.first; - if (!IterBool.second && !Unique) + if (!IterBool.second) return Entry.second; - MCSymbol *GroupSym = nullptr; - if (!Group.empty()) - GroupSym = GetOrCreateSymbol(Group); - - StringRef CachedName = Entry.first.first; + StringRef CachedName = Entry.first.SectionName; SectionKind Kind; if (Flags & ELF::SHF_EXECINSTR) @@ -303,25 +330,17 @@ const MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type, if (BeginSymName) Begin = createTempSymbol(BeginSymName, false); - MCSectionELF *Result = new (*this) MCSectionELF( - CachedName, Type, Flags, Kind, EntrySize, GroupSym, Unique, Begin); - if (!Unique) - Entry.second = Result; + MCSectionELF *Result = + new (*this) MCSectionELF(CachedName, Type, Flags, Kind, EntrySize, + GroupSym, UniqueID, Begin, Associated); + Entry.second = Result; return Result; } -const MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type, - unsigned Flags, unsigned EntrySize, - StringRef Group, - const char *BeginSymName) { - return getELFSection(Section, Type, Flags, EntrySize, Group, false, - BeginSymName); -} - const MCSectionELF *MCContext::CreateELFGroupSection() { MCSectionELF *Result = new (*this) MCSectionELF(".group", ELF::SHT_GROUP, 0, SectionKind::getReadOnly(), 4, - nullptr, false, nullptr); + nullptr, ~0, nullptr, nullptr); return Result; } @@ -329,23 +348,24 @@ const MCSectionCOFF * MCContext::getCOFFSection(StringRef Section, unsigned Characteristics, SectionKind Kind, StringRef COMDATSymName, int Selection, const char *BeginSymName) { - // Do the lookup, if we have a hit, return it. + MCSymbol *COMDATSymbol = nullptr; + if (!COMDATSymName.empty()) { + COMDATSymbol = GetOrCreateSymbol(COMDATSymName); + COMDATSymName = COMDATSymbol->getName(); + } - SectionGroupTriple T(Section, COMDATSymName, Selection); + // Do the lookup, if we have a hit, return it. + COFFSectionKey T{Section, COMDATSymName, Selection}; auto IterBool = COFFUniquingMap.insert(std::make_pair(T, nullptr)); auto Iter = IterBool.first; if (!IterBool.second) return Iter->second; - MCSymbol *COMDATSymbol = nullptr; - if (!COMDATSymName.empty()) - COMDATSymbol = GetOrCreateSymbol(COMDATSymName); - MCSymbol *Begin = nullptr; if (BeginSymName) Begin = createTempSymbol(BeginSymName, false); - StringRef CachedName = std::get<0>(Iter->first); + StringRef CachedName = Iter->first.SectionName; MCSectionCOFF *Result = new (*this) MCSectionCOFF( CachedName, Characteristics, COMDATSymbol, Selection, Kind, Begin); @@ -361,7 +381,7 @@ const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section, } const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section) { - SectionGroupTriple T(Section, "", 0); + COFFSectionKey T{Section, "", 0}; auto Iter = COFFUniquingMap.find(T); if (Iter == COFFUniquingMap.end()) return nullptr; diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp index d9f01d0..716d76a 100644 --- a/lib/MC/MCDisassembler/Disassembler.cpp +++ b/lib/MC/MCDisassembler/Disassembler.cpp @@ -33,22 +33,22 @@ using namespace llvm; // disassembler context. If not, it returns NULL. // LLVMDisasmContextRef -LLVMCreateDisasmCPUFeatures(const char *Triple, const char *CPU, +LLVMCreateDisasmCPUFeatures(const char *TT, const char *CPU, const char *Features, void *DisInfo, int TagType, LLVMOpInfoCallback GetOpInfo, LLVMSymbolLookupCallback SymbolLookUp) { // Get the target. std::string Error; - const Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error); + const Target *TheTarget = TargetRegistry::lookupTarget(TT, Error); if (!TheTarget) return nullptr; - const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(Triple); + const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(TT); if (!MRI) return nullptr; // Get the assembler info needed to setup the MCContext. - const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(*MRI, Triple); + const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(*MRI, TT); if (!MAI) return nullptr; @@ -56,8 +56,8 @@ LLVMCreateDisasmCPUFeatures(const char *Triple, const char *CPU, if (!MII) return nullptr; - const MCSubtargetInfo *STI = TheTarget->createMCSubtargetInfo(Triple, CPU, - Features); + const MCSubtargetInfo *STI = + TheTarget->createMCSubtargetInfo(TT, CPU, Features); if (!STI) return nullptr; @@ -72,25 +72,24 @@ LLVMCreateDisasmCPUFeatures(const char *Triple, const char *CPU, return nullptr; std::unique_ptr<MCRelocationInfo> RelInfo( - TheTarget->createMCRelocationInfo(Triple, *Ctx)); + TheTarget->createMCRelocationInfo(TT, *Ctx)); if (!RelInfo) return nullptr; std::unique_ptr<MCSymbolizer> Symbolizer(TheTarget->createMCSymbolizer( - Triple, GetOpInfo, SymbolLookUp, DisInfo, Ctx, std::move(RelInfo))); + TT, GetOpInfo, SymbolLookUp, DisInfo, Ctx, std::move(RelInfo))); DisAsm->setSymbolizer(std::move(Symbolizer)); // Set up the instruction printer. int AsmPrinterVariant = MAI->getAssemblerDialect(); - MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant, - *MAI, *MII, *MRI, *STI); + MCInstPrinter *IP = TheTarget->createMCInstPrinter( + Triple(TT), AsmPrinterVariant, *MAI, *MII, *MRI); if (!IP) return nullptr; - LLVMDisasmContext *DC = new LLVMDisasmContext(Triple, DisInfo, TagType, - GetOpInfo, SymbolLookUp, - TheTarget, MAI, MRI, - STI, MII, Ctx, DisAsm, IP); + LLVMDisasmContext *DC = + new LLVMDisasmContext(TT, DisInfo, TagType, GetOpInfo, SymbolLookUp, + TheTarget, MAI, MRI, STI, MII, Ctx, DisAsm, IP); if (!DC) return nullptr; @@ -98,19 +97,19 @@ LLVMCreateDisasmCPUFeatures(const char *Triple, const char *CPU, return DC; } -LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU, - void *DisInfo, int TagType, - LLVMOpInfoCallback GetOpInfo, - LLVMSymbolLookupCallback SymbolLookUp){ - return LLVMCreateDisasmCPUFeatures(Triple, CPU, "", DisInfo, TagType, - GetOpInfo, SymbolLookUp); +LLVMDisasmContextRef +LLVMCreateDisasmCPU(const char *TT, const char *CPU, void *DisInfo, int TagType, + LLVMOpInfoCallback GetOpInfo, + LLVMSymbolLookupCallback SymbolLookUp) { + return LLVMCreateDisasmCPUFeatures(TT, CPU, "", DisInfo, TagType, GetOpInfo, + SymbolLookUp); } -LLVMDisasmContextRef LLVMCreateDisasm(const char *Triple, void *DisInfo, +LLVMDisasmContextRef LLVMCreateDisasm(const char *TT, void *DisInfo, int TagType, LLVMOpInfoCallback GetOpInfo, LLVMSymbolLookupCallback SymbolLookUp) { - return LLVMCreateDisasmCPUFeatures(Triple, "", "", DisInfo, TagType, - GetOpInfo, SymbolLookUp); + return LLVMCreateDisasmCPUFeatures(TT, "", "", DisInfo, TagType, GetOpInfo, + SymbolLookUp); } // @@ -268,7 +267,7 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes, SmallVector<char, 64> InsnStr; raw_svector_ostream OS(InsnStr); formatted_raw_ostream FormattedOS(OS); - IP->printInst(&Inst, FormattedOS, AnnotationsStr); + IP->printInst(&Inst, FormattedOS, AnnotationsStr, *DC->getSubtargetInfo()); if (DC->getOptions() & LLVMDisassembler_Option_PrintLatency) emitLatency(DC, Inst); @@ -312,11 +311,10 @@ int LLVMSetDisasmOptions(LLVMDisasmContextRef DCR, uint64_t Options){ const MCAsmInfo *MAI = DC->getAsmInfo(); const MCInstrInfo *MII = DC->getInstrInfo(); const MCRegisterInfo *MRI = DC->getRegisterInfo(); - const MCSubtargetInfo *STI = DC->getSubtargetInfo(); int AsmPrinterVariant = MAI->getAssemblerDialect(); AsmPrinterVariant = AsmPrinterVariant == 0 ? 1 : 0; MCInstPrinter *IP = DC->getTarget()->createMCInstPrinter( - AsmPrinterVariant, *MAI, *MII, *MRI, *STI); + Triple(DC->getTripleName()), AsmPrinterVariant, *MAI, *MII, *MRI); if (IP) { DC->setIP(IP); DC->addOptions(LLVMDisassembler_Option_AsmPrinterVariant); diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index 87e7ed1..e9f685e 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -803,7 +803,7 @@ static void EmitGenDwarfRanges(MCStreamer *MCOS) { MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfRangesSection()); - for (const auto sec : Sections) { + for (const auto &sec : Sections) { MCSymbol *StartSymbol = sec.second.first; MCSymbol *EndSymbol = sec.second.second; diff --git a/lib/MC/MCELFObjectTargetWriter.cpp b/lib/MC/MCELFObjectTargetWriter.cpp index 84176dc..dc3d6c3 100644 --- a/lib/MC/MCELFObjectTargetWriter.cpp +++ b/lib/MC/MCELFObjectTargetWriter.cpp @@ -28,3 +28,24 @@ bool MCELFObjectTargetWriter::needsRelocateWithSymbol(const MCSymbolData &SD, unsigned Type) const { return false; } + +// ELF doesn't require relocations to be in any order. We sort by the Offset, +// just to match gnu as for easier comparison. The use type is an arbitrary way +// of making the sort deterministic. +static int cmpRel(const ELFRelocationEntry *AP, const ELFRelocationEntry *BP) { + const ELFRelocationEntry &A = *AP; + const ELFRelocationEntry &B = *BP; + if (A.Offset != B.Offset) + return B.Offset - A.Offset; + if (B.Type != A.Type) + return A.Type - B.Type; + //llvm_unreachable("ELFRelocs might be unstable!"); + return 0; +} + + +void +MCELFObjectTargetWriter::sortRelocs(const MCAssembler &Asm, + std::vector<ELFRelocationEntry> &Relocs) { + array_pod_sort(Relocs.begin(), Relocs.end(), cmpRel); +} diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index cdf5033..aa05390 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" @@ -40,6 +41,48 @@ using namespace llvm; MCELFStreamer::~MCELFStreamer() { } +void MCELFStreamer::mergeFragment(MCDataFragment *DF, + MCEncodedFragmentWithFixups *EF) { + MCAssembler &Assembler = getAssembler(); + + if (Assembler.isBundlingEnabled() && Assembler.getRelaxAll()) { + uint64_t FSize = EF->getContents().size(); + + if (FSize > Assembler.getBundleAlignSize()) + report_fatal_error("Fragment can't be larger than a bundle size"); + + uint64_t RequiredBundlePadding = computeBundlePadding( + Assembler, EF, DF->getContents().size(), FSize); + + if (RequiredBundlePadding > UINT8_MAX) + report_fatal_error("Padding cannot exceed 255 bytes"); + + if (RequiredBundlePadding > 0) { + SmallString<256> Code; + raw_svector_ostream VecOS(Code); + MCObjectWriter *OW = Assembler.getBackend().createObjectWriter(VecOS); + + EF->setBundlePadding(static_cast<uint8_t>(RequiredBundlePadding)); + + Assembler.writeFragmentPadding(*EF, FSize, OW); + VecOS.flush(); + delete OW; + + DF->getContents().append(Code.begin(), Code.end()); + } + } + + flushPendingLabels(DF, DF->getContents().size()); + + for (unsigned i = 0, e = EF->getFixups().size(); i != e; ++i) { + EF->getFixups()[i].setOffset(EF->getFixups()[i].getOffset() + + DF->getContents().size()); + DF->getFixups().push_back(EF->getFixups()[i]); + } + DF->setHasInstructions(true); + DF->getContents().append(EF->getContents().begin(), EF->getContents().end()); +} + void MCELFStreamer::InitSections(bool NoExecStack) { // This emulates the same behavior of GNU as. This makes it easier // to compare the output as the major sections are in the same order. @@ -449,7 +492,16 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst, if (Assembler.isBundlingEnabled()) { MCSectionData *SD = getCurrentSectionData(); - if (SD->isBundleLocked() && !SD->isBundleGroupBeforeFirstInst()) + if (Assembler.getRelaxAll() && SD->isBundleLocked()) + // If the -mc-relax-all flag is used and we are bundle-locked, we re-use + // the current bundle group. + DF = BundleGroups.back(); + else if (Assembler.getRelaxAll() && !SD->isBundleLocked()) + // When not in a bundle-locked group and the -mc-relax-all flag is used, + // we create a new temporary fragment which will be later merged into + // the current fragment. + DF = new MCDataFragment(); + else if (SD->isBundleLocked() && !SD->isBundleGroupBeforeFirstInst()) // If we are bundle-locked, we re-use the current fragment. // The bundle-locking directive ensures this is a new data fragment. DF = cast<MCDataFragment>(getCurrentFragment()); @@ -487,6 +539,14 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst, } DF->setHasInstructions(true); DF->getContents().append(Code.begin(), Code.end()); + + if (Assembler.isBundlingEnabled() && Assembler.getRelaxAll()) { + MCSectionData *SD = getCurrentSectionData(); + if (!SD->isBundleLocked()) { + mergeFragment(getOrCreateDataFragment(), DF); + delete DF; + } + } } void MCELFStreamer::EmitBundleAlignMode(unsigned AlignPow2) { @@ -510,6 +570,12 @@ void MCELFStreamer::EmitBundleLock(bool AlignToEnd) { if (!SD->isBundleLocked()) SD->setBundleGroupBeforeFirstInst(true); + if (getAssembler().getRelaxAll() && !SD->isBundleLocked()) { + // TODO: drop the lock state and set directly in the fragment + MCDataFragment *DF = new MCDataFragment(); + BundleGroups.push_back(DF); + } + SD->setBundleLockState(AlignToEnd ? MCSectionData::BundleLockedAlignToEnd : MCSectionData::BundleLocked); } @@ -525,7 +591,27 @@ void MCELFStreamer::EmitBundleUnlock() { else if (SD->isBundleGroupBeforeFirstInst()) report_fatal_error("Empty bundle-locked group is forbidden"); - SD->setBundleLockState(MCSectionData::NotBundleLocked); + // When the -mc-relax-all flag is used, we emit instructions to fragments + // stored on a stack. When the bundle unlock is emited, we pop a fragment + // from the stack a merge it to the one below. + if (getAssembler().getRelaxAll()) { + assert(!BundleGroups.empty() && "There are no bundle groups"); + MCDataFragment *DF = BundleGroups.back(); + + // FIXME: Use BundleGroups to track the lock state instead. + SD->setBundleLockState(MCSectionData::NotBundleLocked); + + // FIXME: Use more separate fragments for nested groups. + if (!SD->isBundleLocked()) { + mergeFragment(getOrCreateDataFragment(), DF); + BundleGroups.pop_back(); + delete DF; + } + + if (SD->getBundleLockState() != MCSectionData::BundleLockedAlignToEnd) + getOrCreateDataFragment()->setAlignToBundleEnd(false); + } else + SD->setBundleLockState(MCSectionData::NotBundleLocked); } void MCELFStreamer::Flush() { @@ -561,7 +647,7 @@ void MCELFStreamer::FinishImpl() { } MCStreamer *llvm::createELFStreamer(MCContext &Context, MCAsmBackend &MAB, - raw_ostream &OS, MCCodeEmitter *CE, + raw_pwrite_stream &OS, MCCodeEmitter *CE, bool RelaxAll) { MCELFStreamer *S = new MCELFStreamer(Context, MAB, OS, CE); if (RelaxAll) diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index 8a64403..0702539 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -775,6 +775,10 @@ const MCSection *MCExpr::FindAssociatedSection() const { if (RHS_S == MCSymbol::AbsolutePseudoSection) return LHS_S; + // Not always correct, but probably the best we can do without more context. + if (BE->getOpcode() == MCBinaryExpr::Sub) + return MCSymbol::AbsolutePseudoSection; + // Otherwise, return the first non-null section. return LHS_S ? LHS_S : RHS_S; } diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index d5c7101..5c78f5f 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -54,7 +54,7 @@ private: void EmitDataRegionEnd(); public: - MCMachOStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS, + MCMachOStreamer(MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS, MCCodeEmitter *Emitter, bool DWARFMustBeAtTheEnd, bool label) : MCObjectStreamer(Context, MAB, OS, Emitter), LabelSections(label), DWARFMustBeAtTheEnd(DWARFMustBeAtTheEnd), CreatedADWARFSection(false) {} @@ -491,7 +491,7 @@ void MCMachOStreamer::FinishImpl() { } MCStreamer *llvm::createMachOStreamer(MCContext &Context, MCAsmBackend &MAB, - raw_ostream &OS, MCCodeEmitter *CE, + raw_pwrite_stream &OS, MCCodeEmitter *CE, bool RelaxAll, bool DWARFMustBeAtTheEnd, bool LabelSections) { MCMachOStreamer *S = new MCMachOStreamer(Context, MAB, OS, CE, diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index 6aa2de3..d254e95 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -24,18 +24,13 @@ using namespace llvm; MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter_) + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter_) : MCStreamer(Context), Assembler(new MCAssembler(Context, TAB, *Emitter_, *TAB.createObjectWriter(OS), OS)), CurSectionData(nullptr), EmitEHFrame(true), EmitDebugFrame(false) {} -MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter_, - MCAssembler *Assembler) - : MCStreamer(Context), Assembler(Assembler), CurSectionData(nullptr), - EmitEHFrame(true), EmitDebugFrame(false) {} - MCObjectStreamer::~MCObjectStreamer() { delete &Assembler->getBackend(); delete &Assembler->getEmitter(); @@ -43,7 +38,7 @@ MCObjectStreamer::~MCObjectStreamer() { delete Assembler; } -void MCObjectStreamer::flushPendingLabels(MCFragment *F) { +void MCObjectStreamer::flushPendingLabels(MCFragment *F, uint64_t FOffset) { if (PendingLabels.size()) { if (!F) { F = new MCDataFragment(); @@ -52,7 +47,7 @@ void MCObjectStreamer::flushPendingLabels(MCFragment *F) { } for (MCSymbolData *SD : PendingLabels) { SD->setFragment(F); - SD->setOffset(0); + SD->setOffset(FOffset); } PendingLabels.clear(); } @@ -93,7 +88,8 @@ MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() { MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment()); // When bundling is enabled, we don't want to add data to a fragment that // already has instructions (see MCELFStreamer::EmitInstToData for details) - if (!F || (Assembler->isBundlingEnabled() && F->hasInstructions())) { + if (!F || (Assembler->isBundlingEnabled() && !Assembler->getRelaxAll() && + F->hasInstructions())) { F = new MCDataFragment(); insert(F); } @@ -149,7 +145,9 @@ void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) { // If there is a current fragment, mark the symbol as pointing into it. // Otherwise queue the label and set its fragment pointer when we emit the // next fragment. - if (auto *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment())) { + auto *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment()); + if (F && !(getAssembler().isBundlingEnabled() && + getAssembler().getRelaxAll())) { SD.setFragment(F); SD.setOffset(F->getContents().size()); } else { @@ -248,6 +246,9 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst, void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst, const MCSubtargetInfo &STI) { + if (getAssembler().getRelaxAll() && getAssembler().isBundlingEnabled()) + llvm_unreachable("All instructions should have already been relaxed"); + // Always create a new, separate fragment here, because its size can change // during relaxation. MCRelaxableFragment *IF = new MCRelaxableFragment(Inst, STI); diff --git a/lib/MC/MCObjectWriter.cpp b/lib/MC/MCObjectWriter.cpp index 3c536ec..e40c07d 100644 --- a/lib/MC/MCObjectWriter.cpp +++ b/lib/MC/MCObjectWriter.cpp @@ -27,7 +27,7 @@ bool MCObjectWriter::IsSymbolRefDifferenceFullyResolved( const MCSymbol &SA = A->getSymbol(); const MCSymbol &SB = B->getSymbol(); - if (SA.AliasedSymbol().isUndefined() || SB.AliasedSymbol().isUndefined()) + if (SA.isUndefined() || SB.isUndefined()) return false; const MCSymbolData &DataA = Asm.getSymbolData(SA); @@ -35,19 +35,15 @@ bool MCObjectWriter::IsSymbolRefDifferenceFullyResolved( if(!DataA.getFragment() || !DataB.getFragment()) return false; - return IsSymbolRefDifferenceFullyResolvedImpl(Asm, DataA, - *DataB.getFragment(), - InSet, - false); + return IsSymbolRefDifferenceFullyResolvedImpl( + Asm, DataA, &DataB, *DataB.getFragment(), InSet, false); } -bool -MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, - const MCSymbolData &DataA, - const MCFragment &FB, - bool InSet, - bool IsPCRel) const { - const MCSection &SecA = DataA.getSymbol().AliasedSymbol().getSection(); +bool MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl( + const MCAssembler &Asm, const MCSymbolData &DataA, + const MCSymbolData *DataB, const MCFragment &FB, bool InSet, + bool IsPCRel) const { + const MCSection &SecA = DataA.getSymbol().getSection(); const MCSection &SecB = FB.getParent()->getSection(); // On ELF and COFF A - B is absolute if A and B are in the same section. return &SecA == &SecB; diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 2bf980b..92a7507 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -175,7 +175,7 @@ private: public: AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, const MCAsmInfo &MAI); - virtual ~AsmParser(); + ~AsmParser() override; bool Run(bool NoInitialTextSection, bool NoFinalize = false) override; @@ -4606,7 +4606,7 @@ bool AsmParser::parseMSInlineAsm( ++InputIdx; OutputDecls.push_back(OpDecl); OutputDeclsAddressOf.push_back(Operand.needAddressOf()); - OutputConstraints.push_back('=' + Operand.getConstraint().str()); + OutputConstraints.push_back(("=" + Operand.getConstraint()).str()); AsmStrRewrites.push_back(AsmRewrite(AOK_Output, Start, SymName.size())); } else { InputDecls.push_back(OpDecl); diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp index 7a120a1..a19339d 100644 --- a/lib/MC/MCParser/ELFAsmParser.cpp +++ b/lib/MC/MCParser/ELFAsmParser.cpp @@ -379,7 +379,7 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) { const MCExpr *Subsection = nullptr; bool UseLastGroup = false; StringRef UniqueStr; - bool Unique = false; + int64_t UniqueID = ~0; // Set the defaults first. if (SectionName == ".fini" || SectionName == ".init" || @@ -470,7 +470,15 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) { return TokError("expected identifier in directive"); if (UniqueStr != "unique") return TokError("expected 'unique'"); - Unique = true; + if (getLexer().isNot(AsmToken::Comma)) + return TokError("expected commma"); + Lex(); + if (getParser().parseAbsoluteExpression(UniqueID)) + return true; + if (UniqueID < 0) + return TokError("unique id must be positive"); + if (!isUInt<32>(UniqueID) || UniqueID == ~0U) + return TokError("unique id is too large"); } } } @@ -520,7 +528,7 @@ EndStmt: } const MCSection *ELFSection = getContext().getELFSection( - SectionName, Type, Flags, Size, GroupName, Unique); + SectionName, Type, Flags, Size, GroupName, UniqueID); getStreamer().SwitchSection(ELFSection, Subsection); if (getContext().getGenDwarfForAssembly()) { diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp index da38682..3cd8453 100644 --- a/lib/MC/MCSectionELF.cpp +++ b/lib/MC/MCSectionELF.cpp @@ -24,7 +24,7 @@ MCSectionELF::~MCSectionELF() {} // anchor. bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const { - if (Unique) + if (isUnique()) return false; // FIXME: Does .section .bss/.data/.text work everywhere?? @@ -148,8 +148,8 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, OS << ",comdat"; } - if (Unique) - OS << ",unique"; + if (isUnique()) + OS << ",unique," << UniqueID; OS << '\n'; diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp index ca3894b..daba321 100644 --- a/lib/MC/MCSubtargetInfo.cpp +++ b/lib/MC/MCSubtargetInfo.cpp @@ -93,9 +93,10 @@ MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const { const SubtargetInfoKV *Found = std::lower_bound(ProcSchedModels, ProcSchedModels+NumProcs, CPU); if (Found == ProcSchedModels+NumProcs || StringRef(Found->Key) != CPU) { - errs() << "'" << CPU - << "' is not a recognized processor for this target" - << " (ignoring processor)\n"; + if (CPU != "help") // Don't error if the user asked for help. + errs() << "'" << CPU + << "' is not a recognized processor for this target" + << " (ignoring processor)\n"; return MCSchedModel::GetDefaultSchedModel(); } assert(Found->Value && "Missing processor SchedModel value"); diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp index 2416525..6582574 100644 --- a/lib/MC/MCSymbol.cpp +++ b/lib/MC/MCSymbol.cpp @@ -55,13 +55,7 @@ void MCSymbol::setVariableValue(const MCExpr *Value) { assert(!IsUsed && "Cannot set a variable that has already been used."); assert(Value && "Invalid variable value!"); this->Value = Value; - - // Variables should always be marked as in the same "section" as the value. - const MCSection *Section = Value->FindAssociatedSection(); - if (Section) - setSection(*Section); - else - setUndefined(); + this->Section = nullptr; } void MCSymbol::print(raw_ostream &OS) const { diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index 5e9e86f..837f585 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -649,38 +649,18 @@ void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm, } } -void MachObjectWriter::markAbsoluteVariableSymbols(MCAssembler &Asm, - const MCAsmLayout &Layout) { - for (MCSymbolData &SD : Asm.symbols()) { - if (!SD.getSymbol().isVariable()) - continue; - - // Is the variable is a symbol difference (SA - SB + C) expression, - // and neither symbol is external, mark the variable as absolute. - const MCExpr *Expr = SD.getSymbol().getVariableValue(); - MCValue Value; - if (Expr->EvaluateAsRelocatable(Value, &Layout, nullptr)) { - if (Value.getSymA() && Value.getSymB()) - const_cast<MCSymbol*>(&SD.getSymbol())->setAbsolute(); - } - } -} - void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) { computeSectionAddresses(Asm, Layout); // Create symbol data for any indirect symbols. BindIndirectSymbols(Asm); - - // Mark symbol difference expressions in variables (from .set or = directives) - // as absolute. - markAbsoluteVariableSymbols(Asm, Layout); } bool MachObjectWriter:: IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, const MCSymbolData &DataA, + const MCSymbolData *DataB, const MCFragment &FB, bool InSet, bool IsPCRel) const { @@ -1027,7 +1007,7 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, } MCObjectWriter *llvm::createMachObjectWriter(MCMachObjectTargetWriter *MOTW, - raw_ostream &OS, + raw_pwrite_stream &OS, bool IsLittleEndian) { return new MachObjectWriter(MOTW, OS, IsLittleEndian); } diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp index ec6c9cb..b600baf 100644 --- a/lib/MC/SubtargetFeature.cpp +++ b/lib/MC/SubtargetFeature.cpp @@ -81,11 +81,12 @@ static std::string Join(const std::vector<std::string> &V) { } /// Adding features. -void SubtargetFeatures::AddFeature(StringRef String) { - // Don't add empty features or features we already have. +void SubtargetFeatures::AddFeature(StringRef String, bool Enable) { + // Don't add empty features. if (!String.empty()) // Convert to lowercase, prepend flag if we don't already have a flag. - Features.push_back(hasFlag(String) ? String.str() : "+" + String.lower()); + Features.push_back(hasFlag(String) ? String.lower() + : (Enable ? "+" : "-") + String.lower()); } /// Find KV in array using binary search. diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index c6bc81d..38bb883 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -126,8 +126,8 @@ public: bool UseBigObj; - WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW, raw_ostream &OS); - + WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW, raw_pwrite_stream &OS); + void reset() override { memset(&Header, 0, sizeof(Header)); Header.Machine = TargetObjectWriter->getMachine(); @@ -172,6 +172,7 @@ public: bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, const MCSymbolData &DataA, + const MCSymbolData *DataB, const MCFragment &FB, bool InSet, bool IsPCRel) const override; @@ -257,7 +258,7 @@ size_t COFFSection::size() { // WinCOFFObjectWriter class implementation WinCOFFObjectWriter::WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW, - raw_ostream &OS) + raw_pwrite_stream &OS) : MCObjectWriter(OS, true), TargetObjectWriter(MOTW) { memset(&Header, 0, sizeof(Header)); @@ -382,9 +383,7 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData, coff_symbol->Other = GetOrCreateCOFFSymbol(&SymRef->getSymbol()); } else { - std::string WeakName = std::string(".weak.") - + Symbol.getName().str() - + ".default"; + std::string WeakName = (".weak." + Symbol.getName() + ".default").str(); COFFSymbol *WeakDefault = createSymbol(WeakName); WeakDefault->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE; WeakDefault->Data.StorageClass = COFF::IMAGE_SYM_CLASS_EXTERNAL; @@ -651,16 +650,17 @@ void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm, } bool WinCOFFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl( - const MCAssembler &Asm, const MCSymbolData &DataA, const MCFragment &FB, - bool InSet, bool IsPCRel) const { + const MCAssembler &Asm, const MCSymbolData &DataA, + const MCSymbolData *DataB, const MCFragment &FB, bool InSet, + bool IsPCRel) const { // MS LINK expects to be able to replace all references to a function with a // thunk to implement their /INCREMENTAL feature. Make sure we don't optimize // away any relocations to functions. if ((((DataA.getFlags() & COFF::SF_TypeMask) >> COFF::SF_TypeShift) >> COFF::SCT_COMPLEX_TYPE_SHIFT) == COFF::IMAGE_SYM_DTYPE_FUNCTION) return false; - return MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(Asm, DataA, FB, - InSet, IsPCRel); + return MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl( + Asm, DataA, DataB, FB, InSet, IsPCRel); } bool WinCOFFObjectWriter::isWeak(const MCSymbolData &SD) const { @@ -1073,9 +1073,8 @@ void MCWinCOFFObjectTargetWriter::anchor() {} //------------------------------------------------------------------------------ // WinCOFFObjectWriter factory function -namespace llvm { - MCObjectWriter *createWinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW, - raw_ostream &OS) { - return new WinCOFFObjectWriter(MOTW, OS); - } +MCObjectWriter * +llvm::createWinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW, + raw_pwrite_stream &OS) { + return new WinCOFFObjectWriter(MOTW, OS); } diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp index f902d2b..8f9aacb 100644 --- a/lib/MC/WinCOFFStreamer.cpp +++ b/lib/MC/WinCOFFStreamer.cpp @@ -39,7 +39,7 @@ using namespace llvm; namespace llvm { MCWinCOFFStreamer::MCWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, - MCCodeEmitter &CE, raw_ostream &OS) + MCCodeEmitter &CE, raw_pwrite_stream &OS) : MCObjectStreamer(Context, MAB, OS, &CE), CurSymbol(nullptr) {} void MCWinCOFFStreamer::EmitInstToData(const MCInst &Inst, diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index ad278a4..4c38b8f 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -262,7 +262,7 @@ std::error_code COFFObjectFile::getSymbolSize(DataRefImpl Ref, } const section_iterator SecEnd = section_end(); uint64_t AfterAddr = UnknownAddressOrSize; - for (const symbol_iterator &SymbI : symbols()) { + for (const symbol_iterator SymbI : symbols()) { section_iterator SecI = SecEnd; if (std::error_code EC = SymbI->getSection(SecI)) return EC; diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 4a1c311..7129aa3 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -1537,7 +1537,7 @@ bool ExportEntry::operator==(const ExportEntry &Other) const { if (Stack.size() != Other.Stack.size()) return false; // Not equal if different cumulative strings. - if (!CumulativeString.str().equals(Other.CumulativeString.str())) + if (!CumulativeString.equals(Other.CumulativeString)) return false; // Equal if all nodes in both stacks match. for (unsigned i=0; i < Stack.size(); ++i) { @@ -1559,7 +1559,7 @@ uint64_t ExportEntry::readULEB128(const uint8_t *&Ptr) { } StringRef ExportEntry::name() const { - return CumulativeString.str(); + return CumulativeString; } uint64_t ExportEntry::flags() const { diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp index 4bc8f92..b771a18 100644 --- a/lib/Option/ArgList.cpp +++ b/lib/Option/ArgList.cpp @@ -395,7 +395,7 @@ Arg *DerivedArgList::MakeSeparateArg(const Arg *BaseArg, const Option Opt, Arg *DerivedArgList::MakeJoinedArg(const Arg *BaseArg, const Option Opt, StringRef Value) const { - unsigned Index = BaseArgs.MakeIndex(Opt.getName().str() + Value.str()); + unsigned Index = BaseArgs.MakeIndex((Opt.getName() + Value).str()); SynthesizedArgs.push_back(make_unique<Arg>( Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), Index, BaseArgs.getArgString(Index) + Opt.getName().size(), BaseArg)); diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 2533fa0..228f75e 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -162,7 +162,7 @@ APInt& APInt::operator=(uint64_t RHS) { return clearUnusedBits(); } -/// Profile - This method 'profiles' an APInt for use with FoldingSet. +/// This method 'profiles' an APInt for use with FoldingSet. void APInt::Profile(FoldingSetNodeID& ID) const { ID.AddInteger(BitWidth); @@ -176,7 +176,7 @@ void APInt::Profile(FoldingSetNodeID& ID) const { ID.AddInteger(pVal[i]); } -/// add_1 - This function adds a single "digit" integer, y, to the multiple +/// This function adds a single "digit" integer, y, to the multiple /// "digit" integer array, x[]. x[] is modified to reflect the addition and /// 1 is returned if there is a carry out, otherwise 0 is returned. /// @returns the carry of the addition. @@ -202,7 +202,7 @@ APInt& APInt::operator++() { return clearUnusedBits(); } -/// sub_1 - This function subtracts a single "digit" (64-bit word), y, from +/// This function subtracts a single "digit" (64-bit word), y, from /// the multi-digit integer array, x[], propagating the borrowed 1 value until /// no further borrowing is neeeded or it runs out of "digits" in x. The result /// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted. @@ -231,7 +231,7 @@ APInt& APInt::operator--() { return clearUnusedBits(); } -/// add - This function adds the integer array x to the integer array Y and +/// This function adds the integer array x to the integer array Y and /// places the result in dest. /// @returns the carry out from the addition /// @brief General addition of 64-bit integer arrays @@ -680,12 +680,12 @@ bool APInt::isSplat(unsigned SplatSizeInBits) const { return *this == rotl(SplatSizeInBits); } -/// HiBits - This function returns the high "numBits" bits of this APInt. +/// This function returns the high "numBits" bits of this APInt. APInt APInt::getHiBits(unsigned numBits) const { return APIntOps::lshr(*this, BitWidth - numBits); } -/// LoBits - This function returns the low "numBits" bits of this APInt. +/// This function returns the low "numBits" bits of this APInt. APInt APInt::getLoBits(unsigned numBits) const { return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits), BitWidth - numBits); @@ -861,7 +861,7 @@ APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) { return isNeg ? -Tmp : Tmp; } -/// RoundToDouble - This function converts this APInt to a double. +/// This function converts this APInt to a double. /// The layout for double is as following (IEEE Standard 754): /// -------------------------------------- /// | Sign Exponent Fraction Bias | @@ -2269,9 +2269,8 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, std::reverse(Str.begin()+StartDig, Str.end()); } -/// toString - This returns the APInt as a std::string. Note that this is an -/// inefficient method. It is better to pass in a SmallVector/SmallString -/// to the methods above. +/// Returns the APInt as a std::string. Note that this is an inefficient method. +/// It is better to pass in a SmallVector/SmallString to the methods above. std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const { SmallString<40> S; toString(S, Radix, Signed, /* formatAsCLiteral = */false); diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index af6c605..3cabc54 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -313,7 +313,7 @@ static Option *LookupNearestOption(StringRef Arg, if (RHS.empty() || !PermitValue) NearestString = OptionNames[i]; else - NearestString = std::string(OptionNames[i]) + "=" + RHS.str(); + NearestString = (Twine(OptionNames[i]) + "=" + RHS).str(); } } } @@ -784,7 +784,7 @@ class StrDupSaver : public StringSaver { std::vector<char *> Dups; public: - ~StrDupSaver() { + ~StrDupSaver() override { for (std::vector<char *>::iterator I = Dups.begin(), E = Dups.end(); I != E; ++I) { char *Dup = *I; diff --git a/lib/Support/DataStream.cpp b/lib/Support/DataStream.cpp index a44b958..c243155 100644 --- a/lib/Support/DataStream.cpp +++ b/lib/Support/DataStream.cpp @@ -54,9 +54,7 @@ class DataFileStreamer : public DataStreamer { int Fd; public: DataFileStreamer() : Fd(0) {} - virtual ~DataFileStreamer() { - close(Fd); - } + ~DataFileStreamer() override { close(Fd); } size_t GetBytes(unsigned char *buf, size_t len) override { NumStreamFetches++; return read(Fd, buf, len); diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp index a88b18e..eb99242 100644 --- a/lib/Support/Debug.cpp +++ b/lib/Support/Debug.cpp @@ -114,9 +114,9 @@ static void debug_user_sig_handler(void *Cookie) { // know that debug mode is enabled and dbgs() really is a // circular_raw_ostream. If NDEBUG is defined, then dbgs() == // errs() but this will never be invoked. - llvm::circular_raw_ostream *dbgout = - static_cast<llvm::circular_raw_ostream *>(&llvm::dbgs()); - dbgout->flushBufferWithBanner(); + llvm::circular_raw_ostream &dbgout = + static_cast<circular_raw_ostream &>(llvm::dbgs()); + dbgout.flushBufferWithBanner(); } /// dbgs - Return a circular-buffered debug stream. diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp index 80d2aef..b8538ff 100644 --- a/lib/Support/FoldingSet.cpp +++ b/lib/Support/FoldingSet.cpp @@ -51,8 +51,8 @@ bool FoldingSetNodeIDRef::operator<(FoldingSetNodeIDRef RHS) const { /// void FoldingSetNodeID::AddPointer(const void *Ptr) { // Note: this adds pointers to the hash using sizes and endianness that - // depend on the host. It doesn't matter however, because hashing on - // pointer values in inherently unstable. Nothing should depend on the + // depend on the host. It doesn't matter, however, because hashing on + // pointer values is inherently unstable. Nothing should depend on the // ordering of nodes in the folding set. Bits.append(reinterpret_cast<unsigned *>(&Ptr), reinterpret_cast<unsigned *>(&Ptr+1)); diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp index fd4ce54..97aedc8 100644 --- a/lib/Support/GraphWriter.cpp +++ b/lib/Support/GraphWriter.cpp @@ -92,7 +92,7 @@ static bool ExecGraphViewer(StringRef ExecPath, std::vector<const char *> &args, errs() << " done. \n"; } else { sys::ExecuteNoWait(ExecPath, args.data(), nullptr, nullptr, 0, &ErrMsg); - errs() << "Remember to erase graph file: " << Filename.str() << "\n"; + errs() << "Remember to erase graph file: " << Filename << "\n"; } return false; } @@ -140,6 +140,29 @@ bool llvm::DisplayGraph(StringRef FilenameRef, bool wait, std::string ViewerPath; GraphSession S; +#ifdef __APPLE__ + if (S.TryFindProgram("open", ViewerPath)) { + std::vector<const char *> args; + args.push_back(ViewerPath.c_str()); + if (wait) + args.push_back("-W"); + args.push_back(Filename.c_str()); + args.push_back(nullptr); + errs() << "Trying 'open' program... "; + if (!ExecGraphViewer(ViewerPath, args, Filename, wait, ErrMsg)) + return false; + } +#endif + if (S.TryFindProgram("xdg-open", ViewerPath)) { + std::vector<const char *> args; + args.push_back(ViewerPath.c_str()); + args.push_back(Filename.c_str()); + args.push_back(nullptr); + errs() << "Trying 'xdg-open' program... "; + if (!ExecGraphViewer(ViewerPath, args, Filename, wait, ErrMsg)) + return false; + } + // Graphviz if (S.TryFindProgram("Graphviz", ViewerPath)) { std::vector<const char *> args; diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index 0e9a62e..726961a 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -182,19 +182,21 @@ static bool GetX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, #endif } -static bool OSHasAVXSupport() { +static bool GetX86XCR0(unsigned *rEAX, unsigned *rEDX) { #if defined(__GNUC__) // Check xgetbv; this uses a .byte sequence instead of the instruction // directly because older assemblers do not include support for xgetbv and // there is no easy way to conditionally compile based on the assembler used. - int rEAX, rEDX; - __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0)); + __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (*rEAX), "=d" (*rEDX) : "c" (0)); + return false; #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) - unsigned long long rEAX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); + unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); + *rEAX = Result; + *rEDX = Result >> 32; + return false; #else - int rEAX = 0; // Ensures we return false + return true; #endif - return (rEAX & 6) == 6; } static void DetectX86FamilyModel(unsigned EAX, unsigned &Family, @@ -223,19 +225,30 @@ StringRef sys::getHostCPUName() { char c[12]; } text; - GetX86CpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1); - - unsigned MaxLeaf = EAX; - bool HasSSE3 = (ECX & 0x1); - bool HasSSE41 = (ECX & 0x80000); - // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV + unsigned MaxLeaf; + GetX86CpuIDAndInfo(0, &MaxLeaf, text.u+0, text.u+2, text.u+1); + + bool HasMMX = (EDX >> 23) & 1; + bool HasSSE = (EDX >> 25) & 1; + bool HasSSE2 = (EDX >> 26) & 1; + bool HasSSE3 = (ECX >> 0) & 1; + bool HasSSSE3 = (ECX >> 9) & 1; + bool HasSSE41 = (ECX >> 19) & 1; + bool HasSSE42 = (ECX >> 20) & 1; + bool HasMOVBE = (ECX >> 22) & 1; + // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV // indicates that the AVX registers will be saved and restored on context // switch, then we have full AVX support. const unsigned AVXBits = (1 << 27) | (1 << 28); - bool HasAVX = ((ECX & AVXBits) == AVXBits) && OSHasAVXSupport(); - bool HasAVX2 = HasAVX && MaxLeaf >= 0x7 && - !GetX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX) && - (EBX & 0x20); + bool HasAVX = ((ECX & AVXBits) == AVXBits) && !GetX86XCR0(&EAX, &EDX) && + ((EAX & 0x6) == 0x6); + bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); + bool HasLeaf7 = MaxLeaf >= 0x7 && + !GetX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); + bool HasADX = HasLeaf7 && ((EBX >> 19) & 1); + bool HasAVX2 = HasAVX && HasLeaf7 && (EBX & 0x20); + bool HasAVX512 = HasLeaf7 && HasAVX512Save && ((EBX >> 16) & 1); + GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); bool Em64T = (EDX >> 29) & 0x1; bool HasTBM = (ECX >> 21) & 0x1; @@ -298,6 +311,8 @@ StringRef sys::getHostCPUName() { case 9: // Intel Pentium M processor, Intel Celeron M processor model 09. case 13: // Intel Pentium M processor, Intel Celeron M processor, model // 0Dh. All processors are manufactured using the 90 nm process. + case 21: // Intel EP80579 Integrated Processor and Intel EP80579 + // Integrated Processor with Intel QuickAssist Technology return "pentium-m"; case 14: // Intel Core Duo processor, Intel Core Solo processor, model @@ -313,74 +328,85 @@ StringRef sys::getHostCPUName() { // manufactured using the 65 nm process return "core2"; - case 21: // Intel EP80579 Integrated Processor and Intel EP80579 - // Integrated Processor with Intel QuickAssist Technology - return "i686"; // FIXME: ??? - case 23: // Intel Core 2 Extreme processor, Intel Xeon processor, model // 17h. All processors are manufactured using the 45 nm process. // // 45nm: Penryn , Wolfdale, Yorkfield (XE) - // Not all Penryn processors support SSE 4.1 (such as the Pentium brand) - return HasSSE41 ? "penryn" : "core2"; + case 29: // Intel Xeon processor MP. All processors are manufactured using + // the 45 nm process. + return "penryn"; case 26: // Intel Core i7 processor and Intel Xeon processor. All // processors are manufactured using the 45 nm process. - case 29: // Intel Xeon processor MP. All processors are manufactured using - // the 45 nm process. case 30: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. // As found in a Summer 2010 model iMac. + case 46: // Nehalem EX + return "nehalem"; case 37: // Intel Core i7, laptop version. case 44: // Intel Core i7 processor and Intel Xeon processor. All // processors are manufactured using the 32 nm process. - case 46: // Nehalem EX case 47: // Westmere EX - return "corei7"; + return "westmere"; // SandyBridge: case 42: // Intel Core i7 processor. All processors are manufactured // using the 32 nm process. case 45: - // Not all Sandy Bridge processors support AVX (such as the Pentium - // versions instead of the i7 versions). - return HasAVX ? "corei7-avx" : "corei7"; + return "sandybridge"; // Ivy Bridge: case 58: case 62: // Ivy Bridge EP - // Not all Ivy Bridge processors support AVX (such as the Pentium - // versions instead of the i7 versions). - return HasAVX ? "core-avx-i" : "corei7"; + return "ivybridge"; // Haswell: case 60: case 63: case 69: case 70: - // Not all Haswell processors support AVX2 (such as the Pentium - // versions instead of the i7 versions). - return HasAVX2 ? "core-avx2" : "corei7"; + return "haswell"; // Broadwell: case 61: - // Not all Broadwell processors support AVX2 (such as the Pentium - // versions instead of the i7 versions). - return HasAVX2 ? "broadwell" : "corei7"; + return "broadwell"; case 28: // Most 45 nm Intel Atom processors case 38: // 45 nm Atom Lincroft case 39: // 32 nm Atom Medfield case 53: // 32 nm Atom Midview case 54: // 32 nm Atom Midview - return "atom"; + return "bonnell"; // Atom Silvermont codes from the Intel software optimization guide. case 55: case 74: case 77: - return "slm"; - - default: return (Em64T) ? "x86-64" : "i686"; + return "silvermont"; + + default: // Unknown family 6 CPU, try to guess. + if (HasAVX512) + return "knl"; + if (HasADX) + return "broadwell"; + if (HasAVX2) + return "haswell"; + if (HasAVX) + return "sandybridge"; + if (HasSSE42) + return HasMOVBE ? "silvermont" : "nehalem"; + if (HasSSE41) + return "penryn"; + if (HasSSSE3) + return HasMOVBE ? "bonnell" : "core2"; + if (Em64T) + return "x86-64"; + if (HasSSE2) + return "pentium-m"; + if (HasSSE) + return "pentium3"; + if (HasMMX) + return "pentium2"; + return "pentiumpro"; } case 15: { switch (Model) { @@ -681,7 +707,89 @@ StringRef sys::getHostCPUName() { } #endif -#if defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) +#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\ + || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64) +bool sys::getHostCPUFeatures(StringMap<bool> &Features) { + unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; + unsigned MaxLevel; + union { + unsigned u[3]; + char c[12]; + } text; + + if (GetX86CpuIDAndInfo(0, &MaxLevel, text.u+0, text.u+2, text.u+1) || + MaxLevel < 1) + return false; + + GetX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); + + Features["cmov"] = (EDX >> 15) & 1; + Features["mmx"] = (EDX >> 23) & 1; + Features["sse"] = (EDX >> 25) & 1; + Features["sse2"] = (EDX >> 26) & 1; + Features["sse3"] = (ECX >> 0) & 1; + Features["ssse3"] = (ECX >> 9) & 1; + Features["sse4.1"] = (ECX >> 19) & 1; + Features["sse4.2"] = (ECX >> 20) & 1; + + Features["pclmul"] = (ECX >> 1) & 1; + Features["cx16"] = (ECX >> 13) & 1; + Features["movbe"] = (ECX >> 22) & 1; + Features["popcnt"] = (ECX >> 23) & 1; + Features["aes"] = (ECX >> 25) & 1; + Features["rdrnd"] = (ECX >> 30) & 1; + + // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV + // indicates that the AVX registers will be saved and restored on context + // switch, then we have full AVX support. + bool HasAVX = ((ECX >> 27) & 1) && ((ECX >> 28) & 1) && + !GetX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6); + Features["avx"] = HasAVX; + Features["fma"] = HasAVX && (ECX >> 12) & 1; + Features["f16c"] = HasAVX && (ECX >> 29) & 1; + + // AVX512 requires additional context to be saved by the OS. + bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); + + unsigned MaxExtLevel; + GetX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); + + bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && + !GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); + Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1); + Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1); + Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1); + Features["xop"] = HasAVX && HasExtLeaf1 && ((ECX >> 11) & 1); + Features["fma4"] = HasAVX && HasExtLeaf1 && ((ECX >> 16) & 1); + Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1); + + bool HasLeaf7 = MaxLevel >= 7 && + !GetX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); + + // AVX2 is only supported if we have the OS save support from AVX. + Features["avx2"] = HasAVX && HasLeaf7 && (EBX >> 5) & 1; + + Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1); + Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1); + Features["hle"] = HasLeaf7 && ((EBX >> 4) & 1); + Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1); + Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1); + Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1); + Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1); + Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1); + + // AVX512 is only supported if the OS supports the context save for it. + Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save; + Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save; + Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save; + Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save; + Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save; + Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; + Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save; + + return true; +} +#elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) bool sys::getHostCPUFeatures(StringMap<bool> &Features) { // Read 1024 bytes from /proc/cpuinfo, which should contain the Features line // in all cases. diff --git a/lib/Support/Process.cpp b/lib/Support/Process.cpp index d0c1748..3571cd3 100644 --- a/lib/Support/Process.cpp +++ b/lib/Support/Process.cpp @@ -26,24 +26,6 @@ using namespace sys; //=== independent code. //===----------------------------------------------------------------------===// -/// \brief A helper function to compute the elapsed wall-time since the program -/// started. -/// -/// Note that this routine actually computes the elapsed wall time since the -/// first time it was called. However, we arrange to have it called during the -/// startup of the process to get approximately correct results. -static TimeValue getElapsedWallTime() { - static TimeValue &StartTime = *new TimeValue(TimeValue::now()); - return TimeValue::now() - StartTime; -} - -/// \brief A special global variable to ensure we call \c getElapsedWallTime -/// during global initialization of the program. -/// -/// Note that this variable is never referenced elsewhere. Doing so could -/// create race conditions during program startup or shutdown. -static volatile TimeValue DummyTimeValue = getElapsedWallTime(); - Optional<std::string> Process::FindInEnvPath(const std::string& EnvName, const std::string& FileName) { diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp index d3e29ac..e8344ef 100644 --- a/lib/Support/Regex.cpp +++ b/lib/Support/Regex.cpp @@ -15,6 +15,7 @@ #include "regex_impl.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include <string> using namespace llvm; @@ -158,7 +159,7 @@ std::string Regex::sub(StringRef Repl, StringRef String, RefValue < Matches.size()) Res += Matches[RefValue]; else if (Error && Error->empty()) - *Error = "invalid backreference string '" + Ref.str() + "'"; + *Error = ("invalid backreference string '" + Twine(Ref) + "'").str(); break; } } diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index d4b150a..5b43ecc 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -92,7 +92,7 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { case sparcv9: case sparc: return "sparc"; - case systemz: return "systemz"; + case systemz: return "s390"; case x86: case x86_64: return "x86"; @@ -1111,7 +1111,7 @@ const char *Triple::getARMCPUForArch(StringRef MArch) const { .Cases("v7m", "v7-m", "cortex-m3") .Cases("v7em", "v7e-m", "cortex-m4") .Cases("v8", "v8a", "v8-a", "cortex-a53") - .Cases("v8.1a", "v8.1-a", "generic-armv8.1-a") + .Cases("v8.1a", "v8.1-a", "generic") .Default(nullptr); else result = llvm::StringSwitch<const char *>(MArch) diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index a9b48e0..057bcab1 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -486,12 +486,12 @@ void llvm::sys::DisableSystemDialogsOnCrash() {} /// PrintStackTraceOnErrorSignal - When an error signal (such as SIGABRT or /// SIGSEGV) is delivered to the process, print a stack trace and then exit. -void llvm::sys::PrintStackTraceOnErrorSignal() { +void llvm::sys::PrintStackTraceOnErrorSignal(bool DisableCrashReporting) { AddSignalHandler(PrintStackTraceSignalHandler, nullptr); #if defined(__APPLE__) && defined(ENABLE_CRASH_OVERRIDES) // Environment variable to disable any kind of crash dialog. - if (getenv("LLVM_DISABLE_CRASH_REPORT")) { + if (DisableCrashReporting || getenv("LLVM_DISABLE_CRASH_REPORT")) { mach_port_t self = mach_task_self(); exception_mask_t mask = EXC_MASK_CRASH; diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc index d558ff5..b5523aa 100644 --- a/lib/Support/Windows/Path.inc +++ b/lib/Support/Windows/Path.inc @@ -261,6 +261,7 @@ std::error_code rename(const Twine &from, const Twine &to) { MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING)) return std::error_code(); DWORD LastError = ::GetLastError(); + ec = windows_error(LastError); if (LastError != ERROR_ACCESS_DENIED) break; // Retry MoveFile() at ACCESS_DENIED. diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc index de6bf1c..f070111 100644 --- a/lib/Support/Windows/Signals.inc +++ b/lib/Support/Windows/Signals.inc @@ -389,7 +389,7 @@ void sys::DisableSystemDialogsOnCrash() { /// PrintStackTraceOnErrorSignal - When an error signal (such as SIBABRT or /// SIGSEGV) is delivered to the process, print a stack trace and then exit. -void sys::PrintStackTraceOnErrorSignal() { +void sys::PrintStackTraceOnErrorSignal(bool DisableCrashReporting) { DisableSystemDialogsOnCrash(); RegisterHandler(); LeaveCriticalSection(&CriticalSection); diff --git a/lib/Support/Windows/TimeValue.inc b/lib/Support/Windows/TimeValue.inc index 0223ab4..b90b4f1 100644 --- a/lib/Support/Windows/TimeValue.inc +++ b/lib/Support/Windows/TimeValue.inc @@ -47,6 +47,7 @@ std::string TimeValue::str() const { __time64_t OurTime = this->toEpochTime(); int Error = ::_localtime64_s(&Storage, &OurTime); assert(!Error); + (void)Error; LT = &Storage; #endif diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 051e2dd..6f9f910 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -487,51 +487,53 @@ void format_object_base::home() { // raw_fd_ostream //===----------------------------------------------------------------------===// -raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC, - sys::fs::OpenFlags Flags) - : Error(false), UseAtomicWrites(false), pos(0) { - EC = std::error_code(); +static int getFD(StringRef Filename, std::error_code &EC, + sys::fs::OpenFlags Flags) { // Handle "-" as stdout. Note that when we do this, we consider ourself // the owner of stdout. This means that we can do things like close the // file descriptor when we're done and set the "binary" flag globally. if (Filename == "-") { - FD = STDOUT_FILENO; + EC = std::error_code(); // If user requested binary then put stdout into binary mode if // possible. if (!(Flags & sys::fs::F_Text)) sys::ChangeStdoutToBinary(); - // Close stdout when we're done, to detect any output errors. - ShouldClose = true; - return; + return STDOUT_FILENO; } + int FD; EC = sys::fs::openFileForWrite(Filename, FD, Flags); + if (EC) + return -1; - if (EC) { - ShouldClose = false; - return; - } - - // Ok, we successfully opened the file, so it'll need to be closed. - ShouldClose = true; + return FD; } -/// raw_fd_ostream ctor - FD is the file descriptor that this writes to. If -/// ShouldClose is true, this closes the file when the stream is destroyed. +raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC, + sys::fs::OpenFlags Flags) + : raw_fd_ostream(getFD(Filename, EC, Flags), true) {} + +/// FD is the file descriptor that this writes to. If ShouldClose is true, this +/// closes the file when the stream is destroyed. raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered) - : raw_ostream(unbuffered), FD(fd), - ShouldClose(shouldClose), Error(false), UseAtomicWrites(false) { -#ifdef O_BINARY - // Setting STDOUT to binary mode is necessary in Win32 - // to avoid undesirable linefeed conversion. - // Don't touch STDERR, or w*printf() (in assert()) would barf wide chars. - if (fd == STDOUT_FILENO) - setmode(fd, O_BINARY); -#endif + : raw_pwrite_stream(unbuffered), FD(fd), ShouldClose(shouldClose), + Error(false), UseAtomicWrites(false) { + if (FD < 0 ) { + ShouldClose = false; + return; + } // Get the starting position. off_t loc = ::lseek(FD, 0, SEEK_CUR); - if (loc == (off_t)-1) +#ifdef LLVM_ON_WIN32 + // MSVCRT's _lseek(SEEK_CUR) doesn't return -1 for pipes. + sys::fs::file_status Status; + std::error_code EC = status(FD, Status); + SupportsSeeking = !EC && Status.type() == sys::fs::file_type::regular_file; +#else + SupportsSeeking = loc != (off_t)-1; +#endif + if (!SupportsSeeking) pos = 0; else pos = static_cast<uint64_t>(loc); @@ -623,11 +625,18 @@ void raw_fd_ostream::close() { uint64_t raw_fd_ostream::seek(uint64_t off) { flush(); pos = ::lseek(FD, off, SEEK_SET); - if (pos != off) + if (pos == (uint64_t)-1) error_detected(); return pos; } +void raw_fd_ostream::pwrite(const char *Ptr, size_t Size, uint64_t Offset) { + uint64_t Pos = tell(); + seek(Offset); + write(Ptr, Size); + seek(Pos); +} + size_t raw_fd_ostream::preferred_buffer_size() const { #if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(__minix) // Windows and Minix have no st_blksize. @@ -708,7 +717,9 @@ raw_ostream &llvm::outs() { // Set buffer settings to model stdout behavior. // Delete the file descriptor when the program exits, forcing error // detection. If you don't want this behavior, don't use outs(). - static raw_fd_ostream S(STDOUT_FILENO, true); + std::error_code EC; + static raw_fd_ostream S("-", EC, sys::fs::F_None); + assert(!EC); return S; } @@ -749,7 +760,14 @@ void raw_string_ostream::write_impl(const char *Ptr, size_t Size) { // capacity. This allows raw_ostream to write directly into the correct place, // and we only need to set the vector size when the data is flushed. +raw_svector_ostream::raw_svector_ostream(SmallVectorImpl<char> &O, unsigned) + : OS(O) {} + raw_svector_ostream::raw_svector_ostream(SmallVectorImpl<char> &O) : OS(O) { + init(); +} + +void raw_svector_ostream::init() { // Set up the initial external buffer. We make sure that the buffer has at // least 128 bytes free; raw_ostream itself only requires 64, but we want to // make sure that we don't grow the buffer unnecessarily on destruction (when @@ -763,6 +781,17 @@ raw_svector_ostream::~raw_svector_ostream() { flush(); } +void raw_svector_ostream::pwrite(const char *Ptr, size_t Size, + uint64_t Offset) { + flush(); + + uint64_t End = Offset + Size; + if (End > OS.size()) + OS.resize(End); + + memcpy(OS.begin() + Offset, Ptr, Size); +} + /// resync - This is called when the SmallVector we're appending to is changed /// outside of the raw_svector_ostream's control. It is only safe to do this /// if the raw_svector_ostream has previously been flushed. @@ -817,3 +846,5 @@ void raw_null_ostream::write_impl(const char *Ptr, size_t Size) { uint64_t raw_null_ostream::current_pos() const { return 0; } + +void raw_null_ostream::pwrite(const char *Ptr, size_t Size, uint64_t Offset) {} diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp index 4ae9903..8a8f0ee 100644 --- a/lib/TableGen/Record.cpp +++ b/lib/TableGen/Record.cpp @@ -2040,7 +2040,7 @@ RecordKeeper::getAllDerivedDefinitions(const std::string &ClassName) const { /// to CurRec's name. Init *llvm::QualifyName(Record &CurRec, MultiClass *CurMultiClass, Init *Name, const std::string &Scoper) { - RecTy *Type = dyn_cast<TypedInit>(Name)->getType(); + RecTy *Type = cast<TypedInit>(Name)->getType(); BinOpInit *NewName = BinOpInit::get(BinOpInit::STRCONCAT, diff --git a/lib/TableGen/TGLexer.h b/lib/TableGen/TGLexer.h index 1f750fc..cbc30be 100644 --- a/lib/TableGen/TGLexer.h +++ b/lib/TableGen/TGLexer.h @@ -87,8 +87,7 @@ private: public: TGLexer(SourceMgr &SrcMgr); - ~TGLexer() {} - + tgtok::TokKind Lex() { return CurCode = LexToken(); } diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index bb3db4b..9a7d6c8 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -32,9 +32,6 @@ def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", "Enable ARMv8 CRC-32 checksum instructions">; -def FeatureV8_1a : SubtargetFeature<"v8.1a", "HasV8_1a", "true", - "Enable ARMv8.1a extensions", [FeatureCRC]>; - /// Cyclone has register move instructions which are "free". def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", "Has zero-cycle register moves">; @@ -44,6 +41,13 @@ def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", "Has zero-cycle zeroing instructions">; //===----------------------------------------------------------------------===// +// Architectures. +// + +def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", + "Support ARM v8.1a instructions", [FeatureCRC]>; + +//===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// @@ -92,10 +96,6 @@ def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8, FeatureNEON, FeatureCRC]>; -def : ProcessorModel<"generic-armv8.1-a", NoSchedModel, [FeatureV8_1a, - FeatureNEON, - FeatureCrypto]>; - def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>; def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>; // FIXME: Cortex-A72 is currently modelled as an Cortex-A57. @@ -123,12 +123,14 @@ def AppleAsmParserVariant : AsmParserVariant { // AsmWriter bits get associated with the correct class. def GenericAsmWriter : AsmWriter { string AsmWriterClassName = "InstPrinter"; + int PassSubtarget = 1; int Variant = 0; bit isMCAsmWriter = 1; } def AppleAsmWriter : AsmWriter { let AsmWriterClassName = "AppleInstPrinter"; + int PassSubtarget = 1; int Variant = 1; int isMCAsmWriter = 1; } diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index 1b4483a..0821cff 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -131,29 +131,6 @@ void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) { OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); SM.serializeToStackMapSection(); } - - // Emit a .data.rel section containing any stubs that were created. - if (TT.isOSBinFormatELF()) { - const TargetLoweringObjectFileELF &TLOFELF = - static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering()); - - MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); - - // Output stubs for external and common global variables. - MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); - if (!Stubs.empty()) { - OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); - const DataLayout *TD = TM.getDataLayout(); - - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - OutStreamer.EmitLabel(Stubs[i].first); - OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), - TD->getPointerSize(0)); - } - Stubs.clear(); - } - } - } MachineLocation @@ -371,8 +348,8 @@ void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, assert(NOps == 4); OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; // cast away const; DIetc do not take const operands for some reason. - DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps - 1).getMetadata())); - OS << V.getName(); + OS << cast<MDLocalVariable>(MI->getOperand(NOps - 2).getMetadata()) + ->getName(); OS << " <- "; // Frame address. Currently handles register +- offset only. assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); diff --git a/lib/Target/AArch64/AArch64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp index 568f258..efdb2e3 100644 --- a/lib/Target/AArch64/AArch64CollectLOH.cpp +++ b/lib/Target/AArch64/AArch64CollectLOH.cpp @@ -328,7 +328,7 @@ static void initReachingDef(const MachineFunction &MF, const uint32_t *PreservedRegs = MO.getRegMask(); // Set generated regs. - for (const auto Entry : RegToId) { + for (const auto &Entry : RegToId) { unsigned Reg = Entry.second; // Use the global register ID when querying APIs external to this // pass. diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 41b1132..c2470f7 100644 --- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -698,12 +698,15 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, return expandMOVImm(MBB, MBBI, 32); case AArch64::MOVi64imm: return expandMOVImm(MBB, MBBI, 64); - case AArch64::RET_ReallyLR: - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET)) - .addReg(AArch64::LR); + case AArch64::RET_ReallyLR: { + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET)) + .addReg(AArch64::LR); + transferImpOps(MI, MIB, MIB); MI.eraseFromParent(); return true; } + } return false; } diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 99cb641..c3f6859 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -1917,7 +1917,8 @@ bool AArch64FastISel::selectLoad(const Instruction *I) { // could select it. Emit a copy to subreg if necessary. FastISel will remove // it when it selects the integer extend. unsigned Reg = lookUpRegForValue(IntExtVal); - if (!Reg) { + auto *MI = MRI.getUniqueVRegDef(Reg); + if (!MI) { if (RetVT == MVT::i64 && VT <= MVT::i32) { if (WantZExt) { // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). @@ -1935,10 +1936,7 @@ bool AArch64FastISel::selectLoad(const Instruction *I) { // The integer extend has already been emitted - delete all the instructions // that have been emitted by the integer extend lowering code and use the // result from the load instruction directly. - while (Reg) { - auto *MI = MRI.getUniqueVRegDef(Reg); - if (!MI) - break; + while (MI) { Reg = 0; for (auto &Opnd : MI->uses()) { if (Opnd.isReg()) { @@ -1947,6 +1945,9 @@ bool AArch64FastISel::selectLoad(const Instruction *I) { } } MI->eraseFromParent(); + MI = nullptr; + if (Reg) + MI = MRI.getUniqueVRegDef(Reg); } updateValueMap(IntExtVal, ResultReg); return true; @@ -3034,6 +3035,11 @@ bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT, // Copy all of the result registers out of their specified physreg. MVT CopyVT = RVLocs[0].getValVT(); + + // TODO: Handle big-endian results + if (CopyVT.isVector() && !Subtarget->isLittleEndian()) + return false; + unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index 84bf317..bd2af16 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -9,6 +9,82 @@ // // This file contains the AArch64 implementation of TargetFrameLowering class. // +// On AArch64, stack frames are structured as follows: +// +// The stack grows downward. +// +// All of the individual frame areas on the frame below are optional, i.e. it's +// possible to create a function so that the particular area isn't present +// in the frame. +// +// At function entry, the "frame" looks as follows: +// +// | | Higher address +// |-----------------------------------| +// | | +// | arguments passed on the stack | +// | | +// |-----------------------------------| <- sp +// | | Lower address +// +// +// After the prologue has run, the frame has the following general structure. +// Note that this doesn't depict the case where a red-zone is used. Also, +// technically the last frame area (VLAs) doesn't get created until in the +// main function body, after the prologue is run. However, it's depicted here +// for completeness. +// +// | | Higher address +// |-----------------------------------| +// | | +// | arguments passed on the stack | +// | | +// |-----------------------------------| +// | | +// | prev_fp, prev_lr | +// | (a.k.a. "frame record") | +// |-----------------------------------| <- fp(=x29) +// | | +// | other callee-saved registers | +// | | +// |-----------------------------------| +// |.empty.space.to.make.part.below....| +// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at +// |.the.standard.16-byte.alignment....| compile time; if present) +// |-----------------------------------| +// | | +// | local variables of fixed size | +// | including spill slots | +// |-----------------------------------| <- bp(not defined by ABI, +// |.variable-sized.local.variables....| LLVM chooses X19) +// |.(VLAs)............................| (size of this area is unknown at +// |...................................| compile time) +// |-----------------------------------| <- sp +// | | Lower address +// +// +// To access the data in a frame, at-compile time, a constant offset must be +// computable from one of the pointers (fp, bp, sp) to access it. The size +// of the areas with a dotted background cannot be computed at compile-time +// if they are present, making it required to have all three of fp, bp and +// sp to be set up to be able to access all contents in the frame areas, +// assuming all of the frame areas are non-empty. +// +// For most functions, some of the frame areas are empty. For those functions, +// it may not be necessary to set up fp or bp: +// * A base pointer is definitly needed when there are both VLAs and local +// variables with more-than-default alignment requirements. +// * A frame pointer is definitly needed when there are local variables with +// more-than-default alignment requirements. +// +// In some cases when a base pointer is not strictly needed, it is generated +// anyway when offsets from the frame pointer to access local variables become +// so large that the offset can't be encoded in the immediate fields of loads +// or stores. +// +// FIXME: also explain the redzone concept. +// FIXME: also explain the concept of reserved call frames. +// //===----------------------------------------------------------------------===// #include "AArch64FrameLowering.h" @@ -39,26 +115,6 @@ static cl::opt<bool> EnableRedZone("aarch64-redzone", STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); -static unsigned estimateStackSize(MachineFunction &MF) { - const MachineFrameInfo *FFI = MF.getFrameInfo(); - int Offset = 0; - for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { - int FixedOff = -FFI->getObjectOffset(i); - if (FixedOff > Offset) - Offset = FixedOff; - } - for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { - if (FFI->isDeadObjectIndex(i)) - continue; - Offset += FFI->getObjectSize(i); - unsigned Align = FFI->getObjectAlignment(i); - // Adjust to alignment boundary - Offset = (Offset + Align - 1) / Align * Align; - } - // This does not include the 16 bytes used for fp and lr. - return (unsigned)Offset; -} - bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { if (!EnableRedZone) return false; @@ -83,16 +139,10 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { /// pointer register. bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - -#ifndef NDEBUG const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); - assert(!RegInfo->needsStackRealignment(MF) && - "No stack realignment on AArch64!"); -#endif - return (MFI->hasCalls() || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || MFI->hasStackMap() || - MFI->hasPatchPoint()); + MFI->hasPatchPoint() || RegInfo->needsStackRealignment(MF)); } /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is @@ -288,11 +338,48 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setLocalStackSize(NumBytes); // Allocate space for the rest of the frame. - if (NumBytes) { - // If we're a leaf function, try using the red zone. - if (!canUseRedZone(MF)) - emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, - MachineInstr::FrameSetup); + + const unsigned Alignment = MFI->getMaxAlignment(); + const bool NeedsRealignment = (Alignment > 16); + unsigned scratchSPReg = AArch64::SP; + if (NeedsRealignment) { + // Use the first callee-saved register as a scratch register + assert(MF.getRegInfo().isPhysRegUsed(AArch64::X9) && + "No scratch register to align SP!"); + scratchSPReg = AArch64::X9; + } + + // If we're a leaf function, try using the red zone. + if (NumBytes && !canUseRedZone(MF)) + // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have + // the correct value here, as NumBytes also includes padding bytes, + // which shouldn't be counted here. + emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII, + MachineInstr::FrameSetup); + + assert(!(NeedsRealignment && NumBytes==0) && + "NumBytes should never be 0 when realignment is needed"); + + if (NumBytes && NeedsRealignment) { + const unsigned NrBitsToZero = countTrailingZeros(Alignment); + assert(NrBitsToZero > 1); + assert(scratchSPReg != AArch64::SP); + + // SUB X9, SP, NumBytes + // -- X9 is temporary register, so shouldn't contain any live data here, + // -- free to use. This is already produced by emitFrameOffset above. + // AND SP, X9, 0b11111...0000 + // The logical immediates have a non-trivial encoding. The following + // formula computes the encoded immediate with all ones but + // NrBitsToZero zero bits as least significant bits. + uint32_t andMaskEncoded = + (1 <<12) // = N + | ((64-NrBitsToZero) << 6) // immr + | ((64-NrBitsToZero-1) << 0) // imms + ; + BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) + .addReg(scratchSPReg, RegState::Kill) + .addImm(andMaskEncoded); } // If we need a base pointer, set it up here. It's whatever the value of the @@ -302,15 +389,15 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { // FIXME: Clarify FrameSetup flags here. // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is // needed. - // - if (RegInfo->hasBasePointer(MF)) - TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false); + if (RegInfo->hasBasePointer(MF)) { + TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, + false); + } if (needsFrameMoves) { const DataLayout *TD = MF.getTarget().getDataLayout(); const int StackGrowth = -TD->getPointerSize(0); unsigned FramePtr = RegInfo->getFrameRegister(MF); - // An example of the prologue: // // .globl __foo @@ -460,7 +547,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; - // Initial and residual are named for consitency with the prologue. Note that + // Initial and residual are named for consistency with the prologue. Note that // in the epilogue, the residual adjustment is executed first. uint64_t ArgumentPopSize = 0; if (RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri) { @@ -571,9 +658,9 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, bool isFixed = MFI->isFixedObjectIndex(FI); // Use frame pointer to reference fixed objects. Use it for locals if - // there are VLAs (and thus the SP isn't reliable as a base). - // Make sure useFPForScavengingIndex() does the right thing for the emergency - // spill slot. + // there are VLAs or a dynamically realigned SP (and thus the SP isn't + // reliable as a base). Make sure useFPForScavengingIndex() does the + // right thing for the emergency spill slot. bool UseFP = false; if (AFI->hasStackFrame()) { // Note: Keeping the following as multiple 'if' statements rather than @@ -582,7 +669,8 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, // Argument access should always use the FP. if (isFixed) { UseFP = hasFP(MF); - } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) { + } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) && + !RegInfo->needsStackRealignment(MF)) { // Use SP or FP, whichever gives us the best chance of the offset // being in range for direct access. If the FPOffset is positive, // that'll always be best, as the SP will be even further away. @@ -598,6 +686,10 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, } } + assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) && + "In the presence of dynamic stack pointer realignment, " + "non-argument objects cannot be accessed through the frame pointer"); + if (UseFP) { FrameReg = RegInfo->getFrameRegister(MF); return FPOffset; @@ -695,6 +787,8 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( if (StrOpc == AArch64::STPDpre || StrOpc == AArch64::STPXpre) MIB.addReg(AArch64::SP, RegState::Define); + MBB.addLiveIn(Reg1); + MBB.addLiveIn(Reg2); MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)) .addReg(Reg1, getPrologueDeath(MF, Reg1)) .addReg(AArch64::SP) @@ -794,6 +888,9 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan( if (RegInfo->hasBasePointer(MF)) MRI->setPhysRegUsed(RegInfo->getBaseRegister()); + if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF)) + MRI->setPhysRegUsed(AArch64::X9); + // If any callee-saved registers are used, the frame cannot be eliminated. unsigned NumGPRSpilled = 0; unsigned NumFPRSpilled = 0; @@ -867,7 +964,8 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan( // The CSR spill slots have not been allocated yet, so estimateStackSize // won't include them. MachineFrameInfo *MFI = MF.getFrameInfo(); - unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled); + unsigned CFSize = + MFI->estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled); DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); bool BigStack = (CFSize >= 256); if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h index df3875f..1439bf3 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.h +++ b/lib/Target/AArch64/AArch64FrameLowering.h @@ -22,7 +22,7 @@ class AArch64FrameLowering : public TargetFrameLowering { public: explicit AArch64FrameLowering() : TargetFrameLowering(StackGrowsDown, 16, 0, 16, - false /*StackRealignable*/) {} + true /*StackRealignable*/) {} void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 0a47dcb..f75700d 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -848,7 +848,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, // MOV X0, WideImmediate // LDR X2, [BaseReg, X0] if (isa<ConstantSDNode>(RHS)) { - int64_t ImmOff = (int64_t)dyn_cast<ConstantSDNode>(RHS)->getZExtValue(); + int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue(); unsigned Scale = Log2_32(Size); // Skip the immediate can be seleced by load/store addressing mode. // Also skip the immediate can be encoded by a single ADD (SUB is also diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 0c0e856..90a5e5e 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -281,14 +281,39 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); - // f16 is storage-only, so we promote operations to f32 if we know this is - // valid, and ignore them otherwise. The operations not mentioned here will - // fail to select, but this is not a major problem as no source language - // should be emitting native f16 operations yet. - setOperationAction(ISD::FADD, MVT::f16, Promote); - setOperationAction(ISD::FDIV, MVT::f16, Promote); - setOperationAction(ISD::FMUL, MVT::f16, Promote); - setOperationAction(ISD::FSUB, MVT::f16, Promote); + // f16 is a storage-only type, always promote it to f32. + setOperationAction(ISD::SETCC, MVT::f16, Promote); + setOperationAction(ISD::BR_CC, MVT::f16, Promote); + setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); + setOperationAction(ISD::SELECT, MVT::f16, Promote); + setOperationAction(ISD::FADD, MVT::f16, Promote); + setOperationAction(ISD::FSUB, MVT::f16, Promote); + setOperationAction(ISD::FMUL, MVT::f16, Promote); + setOperationAction(ISD::FDIV, MVT::f16, Promote); + setOperationAction(ISD::FREM, MVT::f16, Promote); + setOperationAction(ISD::FMA, MVT::f16, Promote); + setOperationAction(ISD::FNEG, MVT::f16, Promote); + setOperationAction(ISD::FABS, MVT::f16, Promote); + setOperationAction(ISD::FCEIL, MVT::f16, Promote); + setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); + setOperationAction(ISD::FCOS, MVT::f16, Promote); + setOperationAction(ISD::FFLOOR, MVT::f16, Promote); + setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); + setOperationAction(ISD::FPOW, MVT::f16, Promote); + setOperationAction(ISD::FPOWI, MVT::f16, Promote); + setOperationAction(ISD::FRINT, MVT::f16, Promote); + setOperationAction(ISD::FSIN, MVT::f16, Promote); + setOperationAction(ISD::FSINCOS, MVT::f16, Promote); + setOperationAction(ISD::FSQRT, MVT::f16, Promote); + setOperationAction(ISD::FEXP, MVT::f16, Promote); + setOperationAction(ISD::FEXP2, MVT::f16, Promote); + setOperationAction(ISD::FLOG, MVT::f16, Promote); + setOperationAction(ISD::FLOG2, MVT::f16, Promote); + setOperationAction(ISD::FLOG10, MVT::f16, Promote); + setOperationAction(ISD::FROUND, MVT::f16, Promote); + setOperationAction(ISD::FTRUNC, MVT::f16, Promote); + setOperationAction(ISD::FMINNUM, MVT::f16, Promote); + setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); // v4f16 is also a storage-only type, so promote it to v4f32 when that is // known to be safe. @@ -481,6 +506,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // Enable TBZ/TBNZ MaskAndBranchFoldingIsLegal = true; + EnableExtLdPromotion = true; setMinFunctionAlignment(2); @@ -1557,6 +1583,14 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, if (Op.getOperand(0).getValueType().isVector()) return LowerVectorFP_TO_INT(Op, DAG); + // f16 conversions are promoted to f32. + if (Op.getOperand(0).getValueType() == MVT::f16) { + SDLoc dl(Op); + return DAG.getNode( + Op.getOpcode(), dl, Op.getValueType(), + DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0))); + } + if (Op.getOperand(0).getValueType() != MVT::f128) { // It's legal except when f128 is involved return Op; @@ -1606,6 +1640,15 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, if (Op.getValueType().isVector()) return LowerVectorINT_TO_FP(Op, DAG); + // f16 conversions are promoted to f32. + if (Op.getValueType() == MVT::f16) { + SDLoc dl(Op); + return DAG.getNode( + ISD::FP_ROUND, dl, MVT::f16, + DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)), + DAG.getIntPtrConstant(0)); + } + // i128 conversions are libcalls. if (Op.getOperand(0).getValueType() == MVT::i128) return SDValue(); @@ -2701,8 +2744,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, DAG.getConstant(Outs[i].Flags.getByValSize(), MVT::i64); SDValue Cpy = DAG.getMemcpy( Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(), - /*isVol = */ false, - /*AlwaysInline = */ false, DstInfo, MachinePointerInfo()); + /*isVol = */ false, /*AlwaysInline = */ false, + /*isTailCall = */ false, + DstInfo, MachinePointerInfo()); MemOpChains.push_back(Cpy); } else { @@ -3514,49 +3558,10 @@ static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) { return Result->getOpcode() == ISD::FP_EXTEND && Result->getOperand(0) == Cmp; } -SDValue AArch64TargetLowering::LowerSELECT(SDValue Op, - SelectionDAG &DAG) const { - SDValue CC = Op->getOperand(0); - SDValue TVal = Op->getOperand(1); - SDValue FVal = Op->getOperand(2); - SDLoc DL(Op); - - unsigned Opc = CC.getOpcode(); - // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select - // instruction. - if (CC.getResNo() == 1 && - (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || - Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) { - // Only lower legal XALUO ops. - if (!DAG.getTargetLoweringInfo().isTypeLegal(CC->getValueType(0))) - return SDValue(); - - AArch64CC::CondCode OFCC; - SDValue Value, Overflow; - std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CC.getValue(0), DAG); - SDValue CCVal = DAG.getConstant(OFCC, MVT::i32); - - return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal, - CCVal, Overflow); - } - - if (CC.getOpcode() == ISD::SETCC) - return DAG.getSelectCC(DL, CC.getOperand(0), CC.getOperand(1), TVal, FVal, - cast<CondCodeSDNode>(CC.getOperand(2))->get()); - else - return DAG.getSelectCC(DL, CC, DAG.getConstant(0, CC.getValueType()), TVal, - FVal, ISD::SETNE); -} - -SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op, +SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, + SDValue RHS, SDValue TVal, + SDValue FVal, SDLoc dl, SelectionDAG &DAG) const { - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - SDValue TVal = Op.getOperand(2); - SDValue FVal = Op.getOperand(3); - SDLoc dl(Op); - // Handle f128 first, because it will result in a comparison of some RTLIB // call result against zero. if (LHS.getValueType() == MVT::f128) { @@ -3664,14 +3669,14 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SDValue CCVal; SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl); - EVT VT = Op.getValueType(); + EVT VT = TVal.getValueType(); return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp); } // Now we know we're dealing with FP values. assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); assert(LHS.getValueType() == RHS.getValueType()); - EVT VT = Op.getValueType(); + EVT VT = TVal.getValueType(); // Try to match this select into a max/min operation, which have dedicated // opcode in the instruction set. @@ -3732,6 +3737,58 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op, return CS1; } +SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op, + SelectionDAG &DAG) const { + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue TVal = Op.getOperand(2); + SDValue FVal = Op.getOperand(3); + SDLoc DL(Op); + return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG); +} + +SDValue AArch64TargetLowering::LowerSELECT(SDValue Op, + SelectionDAG &DAG) const { + SDValue CCVal = Op->getOperand(0); + SDValue TVal = Op->getOperand(1); + SDValue FVal = Op->getOperand(2); + SDLoc DL(Op); + + unsigned Opc = CCVal.getOpcode(); + // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select + // instruction. + if (CCVal.getResNo() == 1 && + (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || + Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) { + // Only lower legal XALUO ops. + if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0))) + return SDValue(); + + AArch64CC::CondCode OFCC; + SDValue Value, Overflow; + std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG); + SDValue CCVal = DAG.getConstant(OFCC, MVT::i32); + + return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal, + CCVal, Overflow); + } + + // Lower it the same way as we would lower a SELECT_CC node. + ISD::CondCode CC; + SDValue LHS, RHS; + if (CCVal.getOpcode() == ISD::SETCC) { + LHS = CCVal.getOperand(0); + RHS = CCVal.getOperand(1); + CC = cast<CondCodeSDNode>(CCVal->getOperand(2))->get(); + } else { + LHS = CCVal; + RHS = DAG.getConstant(0, CCVal.getValueType()); + CC = ISD::SETNE; + } + return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG); +} + SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { // Jump table entries as PC relative offsets. No additional tweaking @@ -3920,7 +3977,7 @@ SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op, return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op), Op.getOperand(1), Op.getOperand(2), DAG.getConstant(VaListSize, MVT::i32), - 8, false, false, MachinePointerInfo(DestSV), + 8, false, false, false, MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV)); } @@ -4989,7 +5046,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, unsigned Opcode; if (EltTy == MVT::i8) Opcode = AArch64ISD::DUPLANE8; - else if (EltTy == MVT::i16) + else if (EltTy == MVT::i16 || EltTy == MVT::f16) Opcode = AArch64ISD::DUPLANE16; else if (EltTy == MVT::i32 || EltTy == MVT::f32) Opcode = AArch64ISD::DUPLANE32; @@ -6554,6 +6611,59 @@ bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { VT1.getSizeInBits() <= 32); } +bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const { + if (isa<FPExtInst>(Ext)) + return false; + + // Vector types are next free. + if (Ext->getType()->isVectorTy()) + return false; + + for (const Use &U : Ext->uses()) { + // The extension is free if we can fold it with a left shift in an + // addressing mode or an arithmetic operation: add, sub, and cmp. + + // Is there a shift? + const Instruction *Instr = cast<Instruction>(U.getUser()); + + // Is this a constant shift? + switch (Instr->getOpcode()) { + case Instruction::Shl: + if (!isa<ConstantInt>(Instr->getOperand(1))) + return false; + break; + case Instruction::GetElementPtr: { + gep_type_iterator GTI = gep_type_begin(Instr); + std::advance(GTI, U.getOperandNo()); + Type *IdxTy = *GTI; + // This extension will end up with a shift because of the scaling factor. + // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0. + // Get the shift amount based on the scaling factor: + // log2(sizeof(IdxTy)) - log2(8). + uint64_t ShiftAmt = + countTrailingZeros(getDataLayout()->getTypeStoreSizeInBits(IdxTy)) - 3; + // Is the constant foldable in the shift of the addressing mode? + // I.e., shift amount is between 1 and 4 inclusive. + if (ShiftAmt == 0 || ShiftAmt > 4) + return false; + break; + } + case Instruction::Trunc: + // Check if this is a noop. + // trunc(sext ty1 to ty2) to ty1. + if (Instr->getType() == Ext->getOperand(0)->getType()) + continue; + // FALL THROUGH. + default: + return false; + } + + // At this point we can use the bfm family, so this extension is free + // for that use. + } + return true; +} + bool AArch64TargetLowering::hasPairedLoad(Type *LoadedType, unsigned &RequiredAligment) const { if (!LoadedType->isIntegerTy() && !LoadedType->isFloatTy()) @@ -6597,7 +6707,17 @@ EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, (allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast))) return MVT::f128; - return Size >= 8 ? MVT::i64 : MVT::i32; + if (Size >= 8 && + (memOpAlign(SrcAlign, DstAlign, 8) || + (allowsMisalignedMemoryAccesses(MVT::i64, 0, 1, &Fast) && Fast))) + return MVT::i64; + + if (Size >= 4 && + (memOpAlign(SrcAlign, DstAlign, 4) || + (allowsMisalignedMemoryAccesses(MVT::i32, 0, 1, &Fast) && Fast))) + return MVT::i32; + + return MVT::Other; } // 12-bit optionally shifted immediates are legal for adds. diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index 5ff11e8..820613b 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -355,6 +355,8 @@ public: getPreferredVectorAction(EVT VT) const override; private: + bool isExtFreeImpl(const Instruction *Ext) const override; + /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can /// make the right decision when generating code for different targets. const AArch64Subtarget *Subtarget; @@ -418,6 +420,9 @@ private: SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS, + SDValue TVal, SDValue FVal, SDLoc dl, + SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index d295c02..0c0efaf 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -1637,10 +1637,16 @@ multiclass AddSub<bit isSub, string mnemonic, SDPatternOperator OpNode = null_frag> { let hasSideEffects = 0, isReMaterializable = 1, isAsCheapAsAMove = 1 in { // Add/Subtract immediate + // Increase the weight of the immediate variant to try to match it before + // the extended register variant. + // We used to match the register variant before the immediate when the + // register argument could be implicitly zero-extended. + let AddedComplexity = 6 in def Wri : BaseAddSubImm<isSub, 0, GPR32sp, GPR32sp, addsub_shifted_imm32, mnemonic, OpNode> { let Inst{31} = 0; } + let AddedComplexity = 6 in def Xri : BaseAddSubImm<isSub, 0, GPR64sp, GPR64sp, addsub_shifted_imm64, mnemonic, OpNode> { let Inst{31} = 1; @@ -3282,6 +3288,10 @@ class LoadStoreExclusiveSimple<bits<2> sz, bit o2, bit L, bit o1, bit o0, : BaseLoadStoreExclusive<sz, o2, L, o1, o0, oops, iops, asm, operands> { bits<5> Rt; bits<5> Rn; + let Inst{20-16} = 0b11111; + let Unpredictable{20-16} = 0b11111; + let Inst{14-10} = 0b11111; + let Unpredictable{14-10} = 0b11111; let Inst{9-5} = Rn; let Inst{4-0} = Rt; @@ -5298,6 +5308,27 @@ class BaseSIMDThreeScalar<bit U, bits<2> size, bits<5> opcode, let Inst{4-0} = Rd; } +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDThreeScalarTied<bit U, bits<2> size, bit R, bits<5> opcode, + dag oops, dag iops, string asm, + list<dag> pattern> + : I<oops, iops, asm, "\t$Rd, $Rn, $Rm", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-24} = 0b11110; + let Inst{23-22} = size; + let Inst{21} = R; + let Inst{20-16} = Rm; + let Inst{15-11} = opcode; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + multiclass SIMDThreeScalarD<bit U, bits<5> opc, string asm, SDPatternOperator OpNode> { def v1i64 : BaseSIMDThreeScalar<U, 0b11, opc, FPR64, asm, @@ -5325,6 +5356,16 @@ multiclass SIMDThreeScalarHS<bit U, bits<5> opc, string asm, def v1i16 : BaseSIMDThreeScalar<U, 0b01, opc, FPR16, asm, []>; } +multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v1i32: BaseSIMDThreeScalarTied<U, 0b10, R, opc, (outs FPR32:$dst), + (ins FPR32:$Rd, FPR32:$Rn, FPR32:$Rm), + asm, []>; + def v1i16: BaseSIMDThreeScalarTied<U, 0b01, R, opc, (outs FPR16:$dst), + (ins FPR16:$Rd, FPR16:$Rn, FPR16:$Rm), + asm, []>; +} + multiclass SIMDThreeScalarSD<bit U, bit S, bits<5> opc, string asm, SDPatternOperator OpNode = null_frag> { let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { @@ -5885,7 +5926,7 @@ multiclass SIMDIns { let Inst{20-18} = idx; let Inst{17-16} = 0b10; let Inst{14-12} = idx2; - let Inst{11} = 0; + let Inst{11} = {?}; } def vi32lane : SIMDInsFromElement<".s", v4i32, i32, VectorIndexS> { bits<2> idx; @@ -5893,7 +5934,7 @@ multiclass SIMDIns { let Inst{20-19} = idx; let Inst{18-16} = 0b100; let Inst{14-13} = idx2; - let Inst{12-11} = 0; + let Inst{12-11} = {?,?}; } def vi64lane : SIMDInsFromElement<".d", v2i64, i64, VectorIndexD> { bits<1> idx; @@ -5901,7 +5942,7 @@ multiclass SIMDIns { let Inst{20} = idx; let Inst{19-16} = 0b1000; let Inst{14} = idx2; - let Inst{13-11} = 0; + let Inst{13-11} = {?,?,?}; } // For all forms of the INS instruction, the "mov" mnemonic is the @@ -8517,6 +8558,174 @@ multiclass SIMDLdSt4SingleAliases<string asm> { } // end of 'let Predicates = [HasNEON]' //---------------------------------------------------------------------------- +// AdvSIMD v8.1 Rounding Double Multiply Add/Subtract +//---------------------------------------------------------------------------- + +let Predicates = [HasNEON, HasV8_1a] in { + +class BaseSIMDThreeSameVectorTiedR0<bit Q, bit U, bits<2> size, bits<5> opcode, + RegisterOperand regtype, string asm, + string kind, list<dag> pattern> + : BaseSIMDThreeSameVectorTied<Q, U, size, opcode, regtype, asm, kind, + pattern> { + let Inst{21}=0; +} +multiclass SIMDThreeSameVectorSQRDMLxHTiedHS<bit U, bits<5> opc, string asm, + SDPatternOperator Accum> { + def v4i16 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b01, opc, V64, asm, ".4h", + [(set (v4i16 V64:$dst), + (Accum (v4i16 V64:$Rd), + (v4i16 (int_aarch64_neon_sqrdmulh (v4i16 V64:$Rn), + (v4i16 V64:$Rm)))))]>; + def v8i16 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b01, opc, V128, asm, ".8h", + [(set (v8i16 V128:$dst), + (Accum (v8i16 V128:$Rd), + (v8i16 (int_aarch64_neon_sqrdmulh (v8i16 V128:$Rn), + (v8i16 V128:$Rm)))))]>; + def v2i32 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b10, opc, V64, asm, ".2s", + [(set (v2i32 V64:$dst), + (Accum (v2i32 V64:$Rd), + (v2i32 (int_aarch64_neon_sqrdmulh (v2i32 V64:$Rn), + (v2i32 V64:$Rm)))))]>; + def v4i32 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b10, opc, V128, asm, ".4s", + [(set (v4i32 V128:$dst), + (Accum (v4i32 V128:$Rd), + (v4i32 (int_aarch64_neon_sqrdmulh (v4i32 V128:$Rn), + (v4i32 V128:$Rm)))))]>; +} + +multiclass SIMDIndexedSQRDMLxHSDTied<bit U, bits<4> opc, string asm, + SDPatternOperator Accum> { + def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, + V64, V64, V128_lo, VectorIndexH, + asm, ".4h", ".4h", ".4h", ".h", + [(set (v4i16 V64:$dst), + (Accum (v4i16 V64:$Rd), + (v4i16 (int_aarch64_neon_sqrdmulh + (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc, + V128, V128, V128_lo, VectorIndexH, + asm, ".8h", ".8h", ".8h", ".h", + [(set (v8i16 V128:$dst), + (Accum (v8i16 V128:$Rd), + (v8i16 (int_aarch64_neon_sqrdmulh + (v8i16 V128:$Rn), + (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, + V64, V64, V128, VectorIndexS, + asm, ".2s", ".2s", ".2s", ".s", + [(set (v2i32 V64:$dst), + (Accum (v2i32 V64:$Rd), + (v2i32 (int_aarch64_neon_sqrdmulh + (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + // FIXME: it would be nice to use the scalar (v1i32) instruction here, but + // an intermediate EXTRACT_SUBREG would be untyped. + // FIXME: direct EXTRACT_SUBREG from v2i32 to i32 is illegal, that's why we + // got it lowered here as (i32 vector_extract (v4i32 insert_subvector(..))) + def : Pat<(i32 (Accum (i32 FPR32Op:$Rd), + (i32 (vector_extract + (v4i32 (insert_subvector + (undef), + (v2i32 (int_aarch64_neon_sqrdmulh + (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 + (v4i32 V128:$Rm), + VectorIndexS:$idx)))), + (i32 0))), + (i64 0))))), + (EXTRACT_SUBREG + (v2i32 (!cast<Instruction>(NAME # v2i32_indexed) + (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), + FPR32Op:$Rd, + ssub)), + V64:$Rn, + V128:$Rm, + VectorIndexS:$idx)), + ssub)>; + + def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, + V128, V128, V128, VectorIndexS, + asm, ".4s", ".4s", ".4s", ".s", + [(set (v4i32 V128:$dst), + (Accum (v4i32 V128:$Rd), + (v4i32 (int_aarch64_neon_sqrdmulh + (v4i32 V128:$Rn), + (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + // FIXME: it would be nice to use the scalar (v1i32) instruction here, but + // an intermediate EXTRACT_SUBREG would be untyped. + def : Pat<(i32 (Accum (i32 FPR32Op:$Rd), + (i32 (vector_extract + (v4i32 (int_aarch64_neon_sqrdmulh + (v4i32 V128:$Rn), + (v4i32 (AArch64duplane32 + (v4i32 V128:$Rm), + VectorIndexS:$idx)))), + (i64 0))))), + (EXTRACT_SUBREG + (v4i32 (!cast<Instruction>(NAME # v4i32_indexed) + (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), + FPR32Op:$Rd, + ssub)), + V128:$Rn, + V128:$Rm, + VectorIndexS:$idx)), + ssub)>; + + def i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc, + FPR16Op, FPR16Op, V128_lo, + VectorIndexH, asm, ".h", "", "", ".h", + []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc, + FPR32Op, FPR32Op, V128, VectorIndexS, + asm, ".s", "", "", ".s", + [(set (i32 FPR32Op:$dst), + (Accum (i32 FPR32Op:$Rd), + (i32 (int_aarch64_neon_sqrdmulh + (i32 FPR32Op:$Rn), + (i32 (vector_extract (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } +} +} // let Predicates = [HasNeon, HasV8_1a] + +//---------------------------------------------------------------------------- // Crypto extensions //---------------------------------------------------------------------------- diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 8e0af2d..db231c4 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1526,7 +1526,7 @@ void AArch64InstrInfo::copyPhysRegTuple( } for (; SubReg != End; SubReg += Incr) { - const MachineInstrBuilder &MIB = BuildMI(MBB, I, DL, get(Opcode)); + const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode)); AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI); AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); @@ -1904,7 +1904,7 @@ void AArch64InstrInfo::storeRegToStackSlot( } assert(Opc && "Unknown register class"); - const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc)) + const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI); @@ -2002,7 +2002,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( } assert(Opc && "Unknown register class"); - const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc)) + const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc)) .addReg(DestReg, getDefRegState(true)) .addFrameIndex(FI); if (Offset) diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index ec6fa5c..f7db50a 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -14,6 +14,8 @@ //===----------------------------------------------------------------------===// // ARM Instruction Predicate Definitions. // +def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">, + AssemblerPredicate<"HasV8_1aOps", "armv8.1a">; def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">; def HasNEON : Predicate<"Subtarget->hasNEON()">, @@ -22,8 +24,6 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">, AssemblerPredicate<"FeatureCrypto", "crypto">; def HasCRC : Predicate<"Subtarget->hasCRC()">, AssemblerPredicate<"FeatureCRC", "crc">; -def HasV8_1a : Predicate<"Subtarget->hasV8_1a()">, - AssemblerPredicate<"FeatureV8_1a", "v8.1a">; def IsLE : Predicate<"Subtarget->isLittleEndian()">; def IsBE : Predicate<"!Subtarget->isLittleEndian()">; def IsCyclone : Predicate<"Subtarget->isCyclone()">; @@ -2314,6 +2314,20 @@ def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">; def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">; def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">; +let Predicates = [HasV8_1a] in { + // v8.1a "Limited Order Region" extension load-acquire instructions + def LDLARW : LoadAcquire <0b10, 1, 1, 0, 0, GPR32, "ldlar">; + def LDLARX : LoadAcquire <0b11, 1, 1, 0, 0, GPR64, "ldlar">; + def LDLARB : LoadAcquire <0b00, 1, 1, 0, 0, GPR32, "ldlarb">; + def LDLARH : LoadAcquire <0b01, 1, 1, 0, 0, GPR32, "ldlarh">; + + // v8.1a "Limited Order Region" extension store-release instructions + def STLLRW : StoreRelease <0b10, 1, 0, 0, 0, GPR32, "stllr">; + def STLLRX : StoreRelease <0b11, 1, 0, 0, 0, GPR64, "stllr">; + def STLLRB : StoreRelease <0b00, 1, 0, 0, 0, GPR32, "stllrb">; + def STLLRH : StoreRelease <0b01, 1, 0, 0, 0, GPR32, "stllrh">; +} + //===----------------------------------------------------------------------===// // Scaled floating point to integer conversion instructions. //===----------------------------------------------------------------------===// @@ -2778,6 +2792,10 @@ defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>; defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_aarch64_neon_urhadd>; defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>; defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>; +defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah", + int_aarch64_neon_sqadd>; +defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh", + int_aarch64_neon_sqsub>; defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>; defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic", @@ -2994,6 +3012,20 @@ defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl> defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>; defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>; defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>; +let Predicates = [HasV8_1a] in { + defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">; + defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">; + def : Pat<(i32 (int_aarch64_neon_sqadd + (i32 FPR32:$Rd), + (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn), + (i32 FPR32:$Rm))))), + (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; + def : Pat<(i32 (int_aarch64_neon_sqsub + (i32 FPR32:$Rd), + (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn), + (i32 FPR32:$Rm))))), + (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; +} def : InstAlias<"cmls $dst, $src1, $src2", (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; @@ -3478,13 +3510,13 @@ def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))), // AdvSIMD INS/DUP instructions //---------------------------------------------------------------------------- -def DUPv8i8gpr : SIMDDupFromMain<0, 0b00001, ".8b", v8i8, V64, GPR32>; -def DUPv16i8gpr : SIMDDupFromMain<1, 0b00001, ".16b", v16i8, V128, GPR32>; -def DUPv4i16gpr : SIMDDupFromMain<0, 0b00010, ".4h", v4i16, V64, GPR32>; -def DUPv8i16gpr : SIMDDupFromMain<1, 0b00010, ".8h", v8i16, V128, GPR32>; -def DUPv2i32gpr : SIMDDupFromMain<0, 0b00100, ".2s", v2i32, V64, GPR32>; -def DUPv4i32gpr : SIMDDupFromMain<1, 0b00100, ".4s", v4i32, V128, GPR32>; -def DUPv2i64gpr : SIMDDupFromMain<1, 0b01000, ".2d", v2i64, V128, GPR64>; +def DUPv8i8gpr : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>; +def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>; +def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>; +def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>; +def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>; +def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>; +def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>; def DUPv2i64lane : SIMDDup64FromElement; def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>; @@ -4324,6 +4356,10 @@ defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", int_aarch64_neon_sqadd>; defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", int_aarch64_neon_sqsub>; +defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah", + int_aarch64_neon_sqadd>; +defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh", + int_aarch64_neon_sqsub>; defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>; defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal", TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp index 33c11fe..1836682 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -165,7 +165,12 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const { // large enough that referencing from the FP won't result in things being // in range relatively often, we can use a base pointer to allow access // from the other direction like the SP normally works. + // Furthermore, if both variable sized objects are present, and the + // stack needs to be dynamically re-aligned, the base pointer is the only + // reliable way to reference the locals. if (MFI->hasVarSizedObjects()) { + if (needsStackRealignment(MF)) + return true; // Conservatively estimate whether the negative offset from the frame // pointer will be sufficient to reach. If a function has a smallish // frame, it's less likely to have lots of spills and callee saved @@ -181,6 +186,31 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const { return false; } +bool AArch64RegisterInfo::canRealignStack(const MachineFunction &MF) const { + + if (MF.getFunction()->hasFnAttribute("no-realign-stack")) + return false; + + return true; +} + +// FIXME: share this with other backends with identical implementation? +bool +AArch64RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const Function *F = MF.getFunction(); + unsigned StackAlign = MF.getTarget() + .getSubtargetImpl(*MF.getFunction()) + ->getFrameLowering() + ->getStackAlignment(); + bool requiresRealignment = + ((MFI->getMaxAlignment() > StackAlign) || + F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackAlignment)); + + return requiresRealignment && canRealignStack(MF); +} + unsigned AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h index c01bfa5..8c379d9 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/lib/Target/AArch64/AArch64RegisterInfo.h @@ -93,6 +93,9 @@ public: unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override; + // Base pointer (stack realignment) support. + bool canRealignStack(const MachineFunction &MF) const; + bool needsStackRealignment(const MachineFunction &MF) const override; }; } // end namespace llvm diff --git a/lib/Target/AArch64/AArch64SchedA57.td b/lib/Target/AArch64/AArch64SchedA57.td index 3ec4157..ca4457a 100644 --- a/lib/Target/AArch64/AArch64SchedA57.td +++ b/lib/Target/AArch64/AArch64SchedA57.td @@ -60,7 +60,12 @@ include "AArch64SchedA57WriteRes.td" // Cortex-A57. The Cortex-A57 types are directly associated with resources, so // defining the aliases precludes the need for mapping them using WriteRes. The // aliases are sufficient for creating a coarse, working model. As the model -// evolves, InstRWs will be used to override these SchedAliases. +// evolves, InstRWs will be used to override some of these SchedAliases. +// +// WARNING: Using SchedAliases is convenient and works well for latency and +// resource lookup for instructions. However, this creates an entry in +// AArch64WriteLatencyTable with a WriteResourceID of 0, breaking +// any SchedReadAdvance since the lookup will fail. def : SchedAlias<WriteImm, A57Write_1cyc_1I>; def : SchedAlias<WriteI, A57Write_1cyc_1I>; @@ -70,8 +75,8 @@ def : SchedAlias<WriteExtr, A57Write_1cyc_1I>; def : SchedAlias<WriteIS, A57Write_1cyc_1I>; def : SchedAlias<WriteID32, A57Write_19cyc_1M>; def : SchedAlias<WriteID64, A57Write_35cyc_1M>; -def : SchedAlias<WriteIM32, A57Write_3cyc_1M>; -def : SchedAlias<WriteIM64, A57Write_5cyc_1M>; +def : WriteRes<WriteIM32, [A57UnitM]> { let Latency = 3; } +def : WriteRes<WriteIM64, [A57UnitM]> { let Latency = 5; } def : SchedAlias<WriteBr, A57Write_1cyc_1B>; def : SchedAlias<WriteBrReg, A57Write_1cyc_1B>; def : SchedAlias<WriteLD, A57Write_4cyc_1L>; @@ -127,6 +132,15 @@ def : InstRW<[A57Write_1cyc_1B_1I], (instrs BL)>; def : InstRW<[A57Write_2cyc_1B_1I], (instrs BLR)>; +// Shifted Register with Shift == 0 +// ---------------------------------------------------------------------------- + +def A57WriteISReg : SchedWriteVariant<[ + SchedVar<RegShiftedPred, [WriteISReg]>, + SchedVar<NoSchedPred, [WriteI]>]>; +def : InstRW<[A57WriteISReg], (instregex ".*rs$")>; + + // Divide and Multiply Instructions // ----------------------------------------------------------------------------- diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp index 221d70d..0b97af8 100644 --- a/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -47,8 +47,9 @@ AArch64Subtarget::AArch64Subtarget(const std::string &TT, const std::string &FS, const TargetMachine &TM, bool LittleEndian) : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), + HasV8_1aOps(false), HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false), - HasV8_1a(false), HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), + HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(TM.getDataLayout()), TLInfo(TM, *this) {} diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h index bcab97d..5454b20 100644 --- a/lib/Target/AArch64/AArch64Subtarget.h +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -37,11 +37,12 @@ protected: /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others. ARMProcFamilyEnum ARMProcFamily; + bool HasV8_1aOps; + bool HasFPARMv8; bool HasNEON; bool HasCrypto; bool HasCRC; - bool HasV8_1a; // HasZeroCycleRegMove - Has zero-cycle register mov instructions. bool HasZeroCycleRegMove; @@ -93,6 +94,8 @@ public: return isCortexA53() || isCortexA57(); } + bool hasV8_1aOps() const { return HasV8_1aOps; } + bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; } bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; } @@ -101,7 +104,6 @@ public: bool hasNEON() const { return HasNEON; } bool hasCrypto() const { return HasCrypto; } bool hasCRC() const { return HasCRC; } - bool hasV8_1a() const { return HasV8_1a; } bool isLittleEndian() const { return IsLittle; } diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index f902f64..ab28a16 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -87,6 +87,11 @@ EnableGEPOpt("aarch64-gep-opt", cl::Hidden, cl::desc("Enable optimizations on complex GEPs"), cl::init(true)); +// FIXME: Unify control over GlobalMerge. +static cl::opt<cl::boolOrDefault> +EnableGlobalMerge("aarch64-global-merge", cl::Hidden, + cl::desc("Enable the global merge pass")); + extern "C" void LLVMInitializeAArch64Target() { // Register the target. RegisterTargetMachine<AArch64leTargetMachine> X(TheAArch64leTarget); @@ -245,7 +250,9 @@ bool AArch64PassConfig::addPreISel() { // FIXME: On AArch64, this depends on the type. // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes(). // and the offset has to be a multiple of the related size in bytes. - if (TM->getOptLevel() == CodeGenOpt::Aggressive) + if ((TM->getOptLevel() == CodeGenOpt::Aggressive && + EnableGlobalMerge == cl::BOU_UNSET) || + EnableGlobalMerge == cl::BOU_TRUE) addPass(createGlobalMergePass(TM, 4095)); if (TM->getOptLevel() != CodeGenOpt::None) addPass(createAArch64AddressTypePromotionPass()); diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 1219ffc..063c714 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -1972,7 +1972,8 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) { bool Valid; auto Mapper = AArch64PRFM::PRFMMapper(); - StringRef Name = Mapper.toString(MCE->getValue(), Valid); + StringRef Name = + Mapper.toString(MCE->getValue(), STI.getFeatureBits(), Valid); Operands.push_back(AArch64Operand::CreatePrefetch(prfop, Name, S, getContext())); return MatchOperand_Success; @@ -1985,7 +1986,8 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) { bool Valid; auto Mapper = AArch64PRFM::PRFMMapper(); - unsigned prfop = Mapper.fromString(Tok.getString(), Valid); + unsigned prfop = + Mapper.fromString(Tok.getString(), STI.getFeatureBits(), Valid); if (!Valid) { TokError("pre-fetch hint expected"); return MatchOperand_ParseFail; @@ -2090,15 +2092,16 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) { const AsmToken &Tok = Parser.getTok(); if (Tok.is(AsmToken::Real)) { APFloat RealVal(APFloat::IEEEdouble, Tok.getString()); + if (isNegative) + RealVal.changeSign(); + uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); - // If we had a '-' in front, toggle the sign bit. - IntVal ^= (uint64_t)isNegative << 63; int Val = AArch64_AM::getFP64Imm(APInt(64, IntVal)); Parser.Lex(); // Eat the token. // Check for out of range values. As an exception, we let Zero through, // as we handle that special case in post-processing before matching in // order to use the zero register for it. - if (Val == -1 && !RealVal.isZero()) { + if (Val == -1 && !RealVal.isPosZero()) { TokError("expected compatible register or floating-point constant"); return MatchOperand_ParseFail; } @@ -2597,7 +2600,8 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) { } bool Valid; auto Mapper = AArch64DB::DBarrierMapper(); - StringRef Name = Mapper.toString(MCE->getValue(), Valid); + StringRef Name = + Mapper.toString(MCE->getValue(), STI.getFeatureBits(), Valid); Operands.push_back( AArch64Operand::CreateBarrier(MCE->getValue(), Name, ExprLoc, getContext())); return MatchOperand_Success; @@ -2610,7 +2614,8 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) { bool Valid; auto Mapper = AArch64DB::DBarrierMapper(); - unsigned Opt = Mapper.fromString(Tok.getString(), Valid); + unsigned Opt = + Mapper.fromString(Tok.getString(), STI.getFeatureBits(), Valid); if (!Valid) { TokError("invalid barrier option name"); return MatchOperand_ParseFail; @@ -2638,18 +2643,21 @@ AArch64AsmParser::tryParseSysReg(OperandVector &Operands) { return MatchOperand_NoMatch; bool IsKnown; - auto MRSMapper = AArch64SysReg::MRSMapper(STI.getFeatureBits()); - uint32_t MRSReg = MRSMapper.fromString(Tok.getString(), IsKnown); + auto MRSMapper = AArch64SysReg::MRSMapper(); + uint32_t MRSReg = MRSMapper.fromString(Tok.getString(), STI.getFeatureBits(), + IsKnown); assert(IsKnown == (MRSReg != -1U) && "register should be -1 if and only if it's unknown"); - auto MSRMapper = AArch64SysReg::MSRMapper(STI.getFeatureBits()); - uint32_t MSRReg = MSRMapper.fromString(Tok.getString(), IsKnown); + auto MSRMapper = AArch64SysReg::MSRMapper(); + uint32_t MSRReg = MSRMapper.fromString(Tok.getString(), STI.getFeatureBits(), + IsKnown); assert(IsKnown == (MSRReg != -1U) && "register should be -1 if and only if it's unknown"); auto PStateMapper = AArch64PState::PStateMapper(); - uint32_t PStateField = PStateMapper.fromString(Tok.getString(), IsKnown); + uint32_t PStateField = + PStateMapper.fromString(Tok.getString(), STI.getFeatureBits(), IsKnown); assert(IsKnown == (PStateField != -1U) && "register should be -1 if and only if it's unknown"); diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index fb25089..1c8c0a66 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -1102,6 +1102,12 @@ static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst, case AArch64::STLRW: case AArch64::STLRB: case AArch64::STLRH: + case AArch64::STLLRW: + case AArch64::STLLRB: + case AArch64::STLLRH: + case AArch64::LDLARW: + case AArch64::LDLARB: + case AArch64::LDLARH: DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); break; case AArch64::STLXRX: @@ -1112,6 +1118,8 @@ static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst, case AArch64::LDAXRX: case AArch64::LDXRX: case AArch64::STLRX: + case AArch64::LDLARX: + case AArch64::STLLRX: DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); break; case AArch64::STLXPW: @@ -1504,7 +1512,10 @@ static DecodeStatus DecodeSystemPStateInstruction(llvm::MCInst &Inst, Inst.addOperand(MCOperand::CreateImm(crm)); bool ValidNamed; - (void)AArch64PState::PStateMapper().toString(pstate_field, ValidNamed); + const AArch64Disassembler *Dis = + static_cast<const AArch64Disassembler *>(Decoder); + (void)AArch64PState::PStateMapper().toString(pstate_field, + Dis->getSubtargetInfo().getFeatureBits(), ValidNamed); return ValidNamed ? Success : Fail; } diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp index 46a1d79..febd332 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -34,18 +34,13 @@ using namespace llvm; AArch64InstPrinter::AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) - : MCInstPrinter(MAI, MII, MRI) { - // Initialize the set of available features. - setAvailableFeatures(STI.getFeatureBits()); -} + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} AArch64AppleInstPrinter::AArch64AppleInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) - : AArch64InstPrinter(MAI, MII, MRI, STI) {} + const MCRegisterInfo &MRI) + : AArch64InstPrinter(MAI, MII, MRI) {} void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { // This is for .cfi directives. @@ -53,7 +48,8 @@ void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { } void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { + StringRef Annot, + const MCSubtargetInfo &STI) { // Check for special encodings and print the canonical alias instead. unsigned Opcode = MI->getOpcode(); @@ -210,8 +206,8 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, return; } - if (!printAliasInstr(MI, O)) - printInstruction(MI, O); + if (!printAliasInstr(MI, STI, O)) + printInstruction(MI, STI, O); printAnnotation(O, Annot); } @@ -614,7 +610,8 @@ static LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) { } void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { + StringRef Annot, + const MCSubtargetInfo &STI) { unsigned Opcode = MI->getOpcode(); StringRef Layout, Mnemonic; @@ -624,7 +621,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O, << getRegisterName(MI->getOperand(0).getReg(), AArch64::vreg) << ", "; unsigned ListOpNum = IsTbx ? 2 : 1; - printVectorList(MI, ListOpNum, O, ""); + printVectorList(MI, ListOpNum, STI, O, ""); O << ", " << getRegisterName(MI->getOperand(ListOpNum + 1).getReg(), AArch64::vreg); @@ -638,7 +635,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O, // Now onto the operands: first a vector list with possible lane // specifier. E.g. { v0 }[2] int OpNum = LdStDesc->ListOperand; - printVectorList(MI, OpNum++, O, ""); + printVectorList(MI, OpNum++, STI, O, ""); if (LdStDesc->HasLane) O << '[' << MI->getOperand(OpNum++).getImm() << ']'; @@ -662,7 +659,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O, return; } - AArch64InstPrinter::printInst(MI, O, Annot); + AArch64InstPrinter::printInst(MI, O, Annot, STI); } bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) { @@ -889,6 +886,7 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) { } void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { @@ -903,6 +901,7 @@ void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } void AArch64InstPrinter::printHexImm(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); O << format("#%#llx", Op.getImm()); @@ -922,6 +921,7 @@ void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo, } void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); assert(Op.isReg() && "Non-register vreg operand!"); @@ -930,6 +930,7 @@ void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo, } void AArch64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); assert(Op.isImm() && "System instruction C[nm] operands must be immediates!"); @@ -937,6 +938,7 @@ void AArch64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo, } void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); if (MO.isImm()) { @@ -946,18 +948,19 @@ void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum, AArch64_AM::getShiftValue(MI->getOperand(OpNum + 1).getImm()); O << '#' << Val; if (Shift != 0) - printShifter(MI, OpNum + 1, O); + printShifter(MI, OpNum + 1, STI, O); if (CommentStream) *CommentStream << '=' << (Val << Shift) << '\n'; } else { assert(MO.isExpr() && "Unexpected operand type!"); O << *MO.getExpr(); - printShifter(MI, OpNum + 1, O); + printShifter(MI, OpNum + 1, STI, O); } } void AArch64InstPrinter::printLogicalImm32(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { uint64_t Val = MI->getOperand(OpNum).getImm(); O << "#0x"; @@ -965,6 +968,7 @@ void AArch64InstPrinter::printLogicalImm32(const MCInst *MI, unsigned OpNum, } void AArch64InstPrinter::printLogicalImm64(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { uint64_t Val = MI->getOperand(OpNum).getImm(); O << "#0x"; @@ -972,6 +976,7 @@ void AArch64InstPrinter::printLogicalImm64(const MCInst *MI, unsigned OpNum, } void AArch64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Val = MI->getOperand(OpNum).getImm(); // LSL #0 should not be printed. @@ -983,18 +988,21 @@ void AArch64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum, } void AArch64InstPrinter::printShiftedRegister(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << getRegisterName(MI->getOperand(OpNum).getReg()); - printShifter(MI, OpNum + 1, O); + printShifter(MI, OpNum + 1, STI, O); } void AArch64InstPrinter::printExtendedRegister(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << getRegisterName(MI->getOperand(OpNum).getReg()); - printArithExtend(MI, OpNum + 1, O); + printArithExtend(MI, OpNum + 1, STI, O); } void AArch64InstPrinter::printArithExtend(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Val = MI->getOperand(OpNum).getImm(); AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getArithExtendType(Val); @@ -1038,24 +1046,28 @@ void AArch64InstPrinter::printMemExtend(const MCInst *MI, unsigned OpNum, } void AArch64InstPrinter::printCondCode(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm(); O << AArch64CC::getCondCodeName(CC); } void AArch64InstPrinter::printInverseCondCode(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm(); O << AArch64CC::getCondCodeName(AArch64CC::getInvertedCondCode(CC)); } void AArch64InstPrinter::printAMNoIndex(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']'; } template<int Scale> void AArch64InstPrinter::printImmScale(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << '#' << Scale * MI->getOperand(OpNum).getImm(); } @@ -1085,10 +1097,12 @@ void AArch64InstPrinter::printAMIndexedWB(const MCInst *MI, unsigned OpNum, } void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned prfop = MI->getOperand(OpNum).getImm(); bool Valid; - StringRef Name = AArch64PRFM::PRFMMapper().toString(prfop, Valid); + StringRef Name = + AArch64PRFM::PRFMMapper().toString(prfop, STI.getFeatureBits(), Valid); if (Valid) O << Name; else @@ -1096,6 +1110,7 @@ void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum, } void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); float FPImm = @@ -1151,6 +1166,7 @@ static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) { } void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O, StringRef LayoutSuffix) { unsigned Reg = MI->getOperand(OpNum).getReg(); @@ -1193,14 +1209,17 @@ void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum, O << " }"; } -void AArch64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - printVectorList(MI, OpNum, O, ""); +void +AArch64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI, + unsigned OpNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { + printVectorList(MI, OpNum, STI, O, ""); } template <unsigned NumLanes, char LaneKind> void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { std::string Suffix("."); if (NumLanes) @@ -1208,15 +1227,17 @@ void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum, else Suffix += LaneKind; - printVectorList(MI, OpNum, O, Suffix); + printVectorList(MI, OpNum, STI, O, Suffix); } void AArch64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << "[" << MI->getOperand(OpNum).getImm() << "]"; } void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNum); @@ -1241,6 +1262,7 @@ void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum, } void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNum); @@ -1256,6 +1278,7 @@ void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum, } void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Val = MI->getOperand(OpNo).getImm(); unsigned Opcode = MI->getOpcode(); @@ -1263,9 +1286,11 @@ void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo, bool Valid; StringRef Name; if (Opcode == AArch64::ISB) - Name = AArch64ISB::ISBMapper().toString(Val, Valid); + Name = AArch64ISB::ISBMapper().toString(Val, STI.getFeatureBits(), + Valid); else - Name = AArch64DB::DBarrierMapper().toString(Val, Valid); + Name = AArch64DB::DBarrierMapper().toString(Val, STI.getFeatureBits(), + Valid); if (Valid) O << Name; else @@ -1273,31 +1298,35 @@ void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo, } void AArch64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Val = MI->getOperand(OpNo).getImm(); - auto Mapper = AArch64SysReg::MRSMapper(getAvailableFeatures()); - std::string Name = Mapper.toString(Val); + auto Mapper = AArch64SysReg::MRSMapper(); + std::string Name = Mapper.toString(Val, STI.getFeatureBits()); O << StringRef(Name).upper(); } void AArch64InstPrinter::printMSRSystemRegister(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Val = MI->getOperand(OpNo).getImm(); - auto Mapper = AArch64SysReg::MSRMapper(getAvailableFeatures()); - std::string Name = Mapper.toString(Val); + auto Mapper = AArch64SysReg::MSRMapper(); + std::string Name = Mapper.toString(Val, STI.getFeatureBits()); O << StringRef(Name).upper(); } void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Val = MI->getOperand(OpNo).getImm(); bool Valid; - StringRef Name = AArch64PState::PStateMapper().toString(Val, Valid); + StringRef Name = + AArch64PState::PStateMapper().toString(Val, STI.getFeatureBits(), Valid); if (Valid) O << StringRef(Name.str()).upper(); else @@ -1305,6 +1334,7 @@ void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo, } void AArch64InstPrinter::printSIMDType10Operand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned RawVal = MI->getOperand(OpNo).getImm(); uint64_t Val = AArch64_AM::decodeAdvSIMDModImmType10(RawVal); diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h index 5f51621..c2077a0 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h @@ -26,16 +26,21 @@ class MCOperand; class AArch64InstPrinter : public MCInstPrinter { public: AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); + const MCRegisterInfo &MRI); - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; void printRegName(raw_ostream &OS, unsigned RegNo) const override; // Autogenerated by tblgen. - virtual void printInstruction(const MCInst *MI, raw_ostream &O); - virtual bool printAliasInstr(const MCInst *MI, raw_ostream &O); + virtual void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &O); + virtual bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &O); virtual void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx, - unsigned PrintMethodIdx, raw_ostream &O); + unsigned PrintMethodIdx, + const MCSubtargetInfo &STI, + raw_ostream &O); virtual StringRef getRegName(unsigned RegNo) const { return getRegisterName(RegNo); } @@ -45,90 +50,126 @@ public: protected: bool printSysAlias(const MCInst *MI, raw_ostream &O); // Operand printers - void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printHexImm(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); + void printHexImm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); void printPostIncOperand(const MCInst *MI, unsigned OpNo, unsigned Imm, raw_ostream &O); - template<int Amount> - void printPostIncOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + template <int Amount> + void printPostIncOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { printPostIncOperand(MI, OpNo, Amount, O); } - void printVRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printSysCROperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printAddSubImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printLogicalImm32(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printLogicalImm64(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printShifter(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printShiftedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printExtendedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printArithExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVRegOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printSysCROperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printAddSubImm(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printLogicalImm32(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printLogicalImm64(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printShifter(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printShiftedRegister(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printExtendedRegister(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printArithExtend(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printMemExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O, char SrcRegKind, unsigned Width); template <char SrcRegKind, unsigned Width> - void printMemExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + void printMemExtend(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { printMemExtend(MI, OpNum, O, SrcRegKind, Width); } - void printCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printInverseCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAlignedLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printCondCode(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printInverseCondCode(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printAlignedLabel(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printUImm12Offset(const MCInst *MI, unsigned OpNum, unsigned Scale, raw_ostream &O); void printAMIndexedWB(const MCInst *MI, unsigned OpNum, unsigned Scale, raw_ostream &O); - template<int Scale> - void printUImm12Offset(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + template <int Scale> + void printUImm12Offset(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { printUImm12Offset(MI, OpNum, Scale, O); } - template<int BitWidth> - void printAMIndexedWB(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + template <int BitWidth> + void printAMIndexedWB(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { printAMIndexedWB(MI, OpNum, BitWidth / 8, O); } - void printAMNoIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAMNoIndex(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); - template<int Scale> - void printImmScale(const MCInst *MI, unsigned OpNum, raw_ostream &O); + template <int Scale> + void printImmScale(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); - void printPrefetchOp(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printPrefetchOp(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); - void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printFPImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); - void printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O, + void printVectorList(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O, StringRef LayoutSuffix); /// Print a list of vector registers where the type suffix is implicit /// (i.e. attached to the instruction rather than the registers). void printImplicitlyTypedVectorList(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); template <unsigned NumLanes, char LaneKind> - void printTypedVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAdrpLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printBarrierOption(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printMSRSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printMRSSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printSystemPStateField(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printSIMDType10Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printTypedVectorList(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + + void printVectorIndex(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printAdrpLabel(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printBarrierOption(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printMSRSystemRegister(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printMRSSystemRegister(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printSystemPStateField(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printSIMDType10Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); }; class AArch64AppleInstPrinter : public AArch64InstPrinter { public: AArch64AppleInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); + const MCRegisterInfo &MRI); - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; - void printInstruction(const MCInst *MI, raw_ostream &O) override; - bool printAliasInstr(const MCInst *MI, raw_ostream &O) override; + void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &O) override; + bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &O) override; void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx, unsigned PrintMethodIdx, + const MCSubtargetInfo &STI, raw_ostream &O) override; StringRef getRegName(unsigned RegNo) const override { return getRegisterName(RegNo); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index 84b63a0..e5eb90c 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -313,7 +313,7 @@ public: DarwinAArch64AsmBackend(const Target &T, const MCRegisterInfo &MRI) : AArch64AsmBackend(T), MRI(MRI) {} - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createAArch64MachObjectWriter(OS, MachO::CPU_TYPE_ARM64, MachO::CPU_SUBTYPE_ARM64_ALL); } @@ -461,7 +461,7 @@ public: ELFAArch64AsmBackend(const Target &T, uint8_t OSABI, bool IsLittleEndian) : AArch64AsmBackend(T), OSABI(OSABI), IsLittleEndian(IsLittleEndian) {} - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createAArch64ELFObjectWriter(OS, OSABI, IsLittleEndian); } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index 5ea49c3..1f516d1 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -26,7 +26,7 @@ class AArch64ELFObjectWriter : public MCELFObjectTargetWriter { public: AArch64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian); - virtual ~AArch64ELFObjectWriter(); + ~AArch64ELFObjectWriter() override; protected: unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, @@ -248,9 +248,9 @@ unsigned AArch64ELFObjectWriter::GetRelocType(const MCValue &Target, llvm_unreachable("Unimplemented fixup -> relocation"); } -MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_ostream &OS, - uint8_t OSABI, - bool IsLittleEndian) { +MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_pwrite_stream &OS, + uint8_t OSABI, + bool IsLittleEndian) { MCELFObjectTargetWriter *MOTW = new AArch64ELFObjectWriter(OSABI, IsLittleEndian); return createELFObjectWriter(MOTW, OS, IsLittleEndian); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index 8f780d2..540d1fc 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -89,12 +89,12 @@ class AArch64ELFStreamer : public MCELFStreamer { public: friend class AArch64TargetELFStreamer; - AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, - MCCodeEmitter *Emitter) + AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_pwrite_stream &OS, MCCodeEmitter *Emitter) : MCELFStreamer(Context, TAB, OS, Emitter), MappingSymbolCounter(0), LastEMS(EMS_None) {} - ~AArch64ELFStreamer() {} + ~AArch64ELFStreamer() override {} void ChangeSection(const MCSection *Section, const MCExpr *Subsection) override { @@ -211,8 +211,8 @@ MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S, } MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - bool RelaxAll) { + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll) { AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter); if (RelaxAll) S->getAssembler().setRelaxAll(true); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h index 71b05cc..ef48203 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h @@ -19,8 +19,8 @@ namespace llvm { MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - bool RelaxAll); + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll); } #endif diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index 9ea49f0..fd4dc47 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -40,7 +40,7 @@ class AArch64MCCodeEmitter : public MCCodeEmitter { public: AArch64MCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) : Ctx(ctx) {} - ~AArch64MCCodeEmitter() {} + ~AArch64MCCodeEmitter() override {} // getBinaryCodeForInstr - TableGen'erated function for getting the // binary encoding for an instruction. diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index 38b399d..afad674 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -109,29 +109,28 @@ static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM, return X; } -static MCInstPrinter *createAArch64MCInstPrinter(const Target &T, +static MCInstPrinter *createAArch64MCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { + const MCRegisterInfo &MRI) { if (SyntaxVariant == 0) - return new AArch64InstPrinter(MAI, MII, MRI, STI); + return new AArch64InstPrinter(MAI, MII, MRI); if (SyntaxVariant == 1) - return new AArch64AppleInstPrinter(MAI, MII, MRI, STI); + return new AArch64AppleInstPrinter(MAI, MII, MRI); return nullptr; } static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx, - MCAsmBackend &TAB, raw_ostream &OS, + MCAsmBackend &TAB, raw_pwrite_stream &OS, MCCodeEmitter *Emitter, bool RelaxAll) { return createAArch64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll); } static MCStreamer *createMachOStreamer(MCContext &Ctx, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - bool RelaxAll, + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll, bool DWARFMustBeAtTheEnd) { return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll, DWARFMustBeAtTheEnd, diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h index 7ce303b..4705bdf 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h @@ -33,6 +33,7 @@ class StringRef; class Target; class Triple; class raw_ostream; +class raw_pwrite_stream; extern Target TheAArch64leTarget; extern Target TheAArch64beTarget; @@ -48,11 +49,13 @@ MCAsmBackend *createAArch64beAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); -MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI, +MCObjectWriter *createAArch64ELFObjectWriter(raw_pwrite_stream &OS, + uint8_t OSABI, bool IsLittleEndian); -MCObjectWriter *createAArch64MachObjectWriter(raw_ostream &OS, uint32_t CPUType, - uint32_t CPUSubtype); +MCObjectWriter *createAArch64MachObjectWriter(raw_pwrite_stream &OS, + uint32_t CPUType, + uint32_t CPUSubtype); MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S, formatted_raw_ostream &OS, diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp index 0d9385d..61649c4 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp @@ -413,9 +413,9 @@ void AArch64MachObjectWriter::RecordRelocation( Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); } -MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_ostream &OS, - uint32_t CPUType, - uint32_t CPUSubtype) { +MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_pwrite_stream &OS, + uint32_t CPUType, + uint32_t CPUSubtype) { return createMachObjectWriter( new AArch64MachObjectWriter(CPUType, CPUSubtype), OS, /*IsLittleEndian=*/true); diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp index 160c1c5..8696163 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -18,9 +18,10 @@ using namespace llvm; -StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const { +StringRef AArch64NamedImmMapper::toString(uint32_t Value, uint64_t FeatureBits, + bool &Valid) const { for (unsigned i = 0; i < NumMappings; ++i) { - if (Mappings[i].Value == Value) { + if (Mappings[i].isValueEqual(Value, FeatureBits)) { Valid = true; return Mappings[i].Name; } @@ -30,10 +31,11 @@ StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const { return StringRef(); } -uint32_t AArch64NamedImmMapper::fromString(StringRef Name, bool &Valid) const { +uint32_t AArch64NamedImmMapper::fromString(StringRef Name, uint64_t FeatureBits, + bool &Valid) const { std::string LowerCaseName = Name.lower(); for (unsigned i = 0; i < NumMappings; ++i) { - if (Mappings[i].Name == LowerCaseName) { + if (Mappings[i].isNameEqual(LowerCaseName, FeatureBits)) { Valid = true; return Mappings[i].Value; } @@ -48,744 +50,776 @@ bool AArch64NamedImmMapper::validImm(uint32_t Value) const { } const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATMappings[] = { - {"s1e1r", S1E1R}, - {"s1e2r", S1E2R}, - {"s1e3r", S1E3R}, - {"s1e1w", S1E1W}, - {"s1e2w", S1E2W}, - {"s1e3w", S1E3W}, - {"s1e0r", S1E0R}, - {"s1e0w", S1E0W}, - {"s12e1r", S12E1R}, - {"s12e1w", S12E1W}, - {"s12e0r", S12E0R}, - {"s12e0w", S12E0W}, + {"s1e1r", S1E1R, 0}, + {"s1e2r", S1E2R, 0}, + {"s1e3r", S1E3R, 0}, + {"s1e1w", S1E1W, 0}, + {"s1e2w", S1E2W, 0}, + {"s1e3w", S1E3W, 0}, + {"s1e0r", S1E0R, 0}, + {"s1e0w", S1E0W, 0}, + {"s12e1r", S12E1R, 0}, + {"s12e1w", S12E1W, 0}, + {"s12e0r", S12E0R, 0}, + {"s12e0w", S12E0W, 0}, }; AArch64AT::ATMapper::ATMapper() : AArch64NamedImmMapper(ATMappings, 0) {} const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierMappings[] = { - {"oshld", OSHLD}, - {"oshst", OSHST}, - {"osh", OSH}, - {"nshld", NSHLD}, - {"nshst", NSHST}, - {"nsh", NSH}, - {"ishld", ISHLD}, - {"ishst", ISHST}, - {"ish", ISH}, - {"ld", LD}, - {"st", ST}, - {"sy", SY} + {"oshld", OSHLD, 0}, + {"oshst", OSHST, 0}, + {"osh", OSH, 0}, + {"nshld", NSHLD, 0}, + {"nshst", NSHST, 0}, + {"nsh", NSH, 0}, + {"ishld", ISHLD, 0}, + {"ishst", ISHST, 0}, + {"ish", ISH, 0}, + {"ld", LD, 0}, + {"st", ST, 0}, + {"sy", SY, 0} }; AArch64DB::DBarrierMapper::DBarrierMapper() : AArch64NamedImmMapper(DBarrierMappings, 16u) {} const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCMappings[] = { - {"zva", ZVA}, - {"ivac", IVAC}, - {"isw", ISW}, - {"cvac", CVAC}, - {"csw", CSW}, - {"cvau", CVAU}, - {"civac", CIVAC}, - {"cisw", CISW} + {"zva", ZVA, 0}, + {"ivac", IVAC, 0}, + {"isw", ISW, 0}, + {"cvac", CVAC, 0}, + {"csw", CSW, 0}, + {"cvau", CVAU, 0}, + {"civac", CIVAC, 0}, + {"cisw", CISW, 0} }; AArch64DC::DCMapper::DCMapper() : AArch64NamedImmMapper(DCMappings, 0) {} const AArch64NamedImmMapper::Mapping AArch64IC::ICMapper::ICMappings[] = { - {"ialluis", IALLUIS}, - {"iallu", IALLU}, - {"ivau", IVAU} + {"ialluis", IALLUIS, 0}, + {"iallu", IALLU, 0}, + {"ivau", IVAU, 0} }; AArch64IC::ICMapper::ICMapper() : AArch64NamedImmMapper(ICMappings, 0) {} const AArch64NamedImmMapper::Mapping AArch64ISB::ISBMapper::ISBMappings[] = { - {"sy", SY}, + {"sy", SY, 0}, }; AArch64ISB::ISBMapper::ISBMapper() : AArch64NamedImmMapper(ISBMappings, 16) {} const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMMappings[] = { - {"pldl1keep", PLDL1KEEP}, - {"pldl1strm", PLDL1STRM}, - {"pldl2keep", PLDL2KEEP}, - {"pldl2strm", PLDL2STRM}, - {"pldl3keep", PLDL3KEEP}, - {"pldl3strm", PLDL3STRM}, - {"plil1keep", PLIL1KEEP}, - {"plil1strm", PLIL1STRM}, - {"plil2keep", PLIL2KEEP}, - {"plil2strm", PLIL2STRM}, - {"plil3keep", PLIL3KEEP}, - {"plil3strm", PLIL3STRM}, - {"pstl1keep", PSTL1KEEP}, - {"pstl1strm", PSTL1STRM}, - {"pstl2keep", PSTL2KEEP}, - {"pstl2strm", PSTL2STRM}, - {"pstl3keep", PSTL3KEEP}, - {"pstl3strm", PSTL3STRM} + {"pldl1keep", PLDL1KEEP, 0}, + {"pldl1strm", PLDL1STRM, 0}, + {"pldl2keep", PLDL2KEEP, 0}, + {"pldl2strm", PLDL2STRM, 0}, + {"pldl3keep", PLDL3KEEP, 0}, + {"pldl3strm", PLDL3STRM, 0}, + {"plil1keep", PLIL1KEEP, 0}, + {"plil1strm", PLIL1STRM, 0}, + {"plil2keep", PLIL2KEEP, 0}, + {"plil2strm", PLIL2STRM, 0}, + {"plil3keep", PLIL3KEEP, 0}, + {"plil3strm", PLIL3STRM, 0}, + {"pstl1keep", PSTL1KEEP, 0}, + {"pstl1strm", PSTL1STRM, 0}, + {"pstl2keep", PSTL2KEEP, 0}, + {"pstl2strm", PSTL2STRM, 0}, + {"pstl3keep", PSTL3KEEP, 0}, + {"pstl3strm", PSTL3STRM, 0} }; AArch64PRFM::PRFMMapper::PRFMMapper() : AArch64NamedImmMapper(PRFMMappings, 32) {} const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStateMappings[] = { - {"spsel", SPSel}, - {"daifset", DAIFSet}, - {"daifclr", DAIFClr} + {"spsel", SPSel, 0}, + {"daifset", DAIFSet, 0}, + {"daifclr", DAIFClr, 0}, + + // v8.1a "Privileged Access Never" extension-specific PStates + {"pan", PAN, AArch64::HasV8_1aOps}, }; AArch64PState::PStateMapper::PStateMapper() : AArch64NamedImmMapper(PStateMappings, 0) {} const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSMappings[] = { - {"mdccsr_el0", MDCCSR_EL0}, - {"dbgdtrrx_el0", DBGDTRRX_EL0}, - {"mdrar_el1", MDRAR_EL1}, - {"oslsr_el1", OSLSR_EL1}, - {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1}, - {"pmceid0_el0", PMCEID0_EL0}, - {"pmceid1_el0", PMCEID1_EL0}, - {"midr_el1", MIDR_EL1}, - {"ccsidr_el1", CCSIDR_EL1}, - {"clidr_el1", CLIDR_EL1}, - {"ctr_el0", CTR_EL0}, - {"mpidr_el1", MPIDR_EL1}, - {"revidr_el1", REVIDR_EL1}, - {"aidr_el1", AIDR_EL1}, - {"dczid_el0", DCZID_EL0}, - {"id_pfr0_el1", ID_PFR0_EL1}, - {"id_pfr1_el1", ID_PFR1_EL1}, - {"id_dfr0_el1", ID_DFR0_EL1}, - {"id_afr0_el1", ID_AFR0_EL1}, - {"id_mmfr0_el1", ID_MMFR0_EL1}, - {"id_mmfr1_el1", ID_MMFR1_EL1}, - {"id_mmfr2_el1", ID_MMFR2_EL1}, - {"id_mmfr3_el1", ID_MMFR3_EL1}, - {"id_isar0_el1", ID_ISAR0_EL1}, - {"id_isar1_el1", ID_ISAR1_EL1}, - {"id_isar2_el1", ID_ISAR2_EL1}, - {"id_isar3_el1", ID_ISAR3_EL1}, - {"id_isar4_el1", ID_ISAR4_EL1}, - {"id_isar5_el1", ID_ISAR5_EL1}, - {"id_aa64pfr0_el1", ID_A64PFR0_EL1}, - {"id_aa64pfr1_el1", ID_A64PFR1_EL1}, - {"id_aa64dfr0_el1", ID_A64DFR0_EL1}, - {"id_aa64dfr1_el1", ID_A64DFR1_EL1}, - {"id_aa64afr0_el1", ID_A64AFR0_EL1}, - {"id_aa64afr1_el1", ID_A64AFR1_EL1}, - {"id_aa64isar0_el1", ID_A64ISAR0_EL1}, - {"id_aa64isar1_el1", ID_A64ISAR1_EL1}, - {"id_aa64mmfr0_el1", ID_A64MMFR0_EL1}, - {"id_aa64mmfr1_el1", ID_A64MMFR1_EL1}, - {"mvfr0_el1", MVFR0_EL1}, - {"mvfr1_el1", MVFR1_EL1}, - {"mvfr2_el1", MVFR2_EL1}, - {"rvbar_el1", RVBAR_EL1}, - {"rvbar_el2", RVBAR_EL2}, - {"rvbar_el3", RVBAR_EL3}, - {"isr_el1", ISR_EL1}, - {"cntpct_el0", CNTPCT_EL0}, - {"cntvct_el0", CNTVCT_EL0}, + {"mdccsr_el0", MDCCSR_EL0, 0}, + {"dbgdtrrx_el0", DBGDTRRX_EL0, 0}, + {"mdrar_el1", MDRAR_EL1, 0}, + {"oslsr_el1", OSLSR_EL1, 0}, + {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1, 0}, + {"pmceid0_el0", PMCEID0_EL0, 0}, + {"pmceid1_el0", PMCEID1_EL0, 0}, + {"midr_el1", MIDR_EL1, 0}, + {"ccsidr_el1", CCSIDR_EL1, 0}, + {"clidr_el1", CLIDR_EL1, 0}, + {"ctr_el0", CTR_EL0, 0}, + {"mpidr_el1", MPIDR_EL1, 0}, + {"revidr_el1", REVIDR_EL1, 0}, + {"aidr_el1", AIDR_EL1, 0}, + {"dczid_el0", DCZID_EL0, 0}, + {"id_pfr0_el1", ID_PFR0_EL1, 0}, + {"id_pfr1_el1", ID_PFR1_EL1, 0}, + {"id_dfr0_el1", ID_DFR0_EL1, 0}, + {"id_afr0_el1", ID_AFR0_EL1, 0}, + {"id_mmfr0_el1", ID_MMFR0_EL1, 0}, + {"id_mmfr1_el1", ID_MMFR1_EL1, 0}, + {"id_mmfr2_el1", ID_MMFR2_EL1, 0}, + {"id_mmfr3_el1", ID_MMFR3_EL1, 0}, + {"id_isar0_el1", ID_ISAR0_EL1, 0}, + {"id_isar1_el1", ID_ISAR1_EL1, 0}, + {"id_isar2_el1", ID_ISAR2_EL1, 0}, + {"id_isar3_el1", ID_ISAR3_EL1, 0}, + {"id_isar4_el1", ID_ISAR4_EL1, 0}, + {"id_isar5_el1", ID_ISAR5_EL1, 0}, + {"id_aa64pfr0_el1", ID_A64PFR0_EL1, 0}, + {"id_aa64pfr1_el1", ID_A64PFR1_EL1, 0}, + {"id_aa64dfr0_el1", ID_A64DFR0_EL1, 0}, + {"id_aa64dfr1_el1", ID_A64DFR1_EL1, 0}, + {"id_aa64afr0_el1", ID_A64AFR0_EL1, 0}, + {"id_aa64afr1_el1", ID_A64AFR1_EL1, 0}, + {"id_aa64isar0_el1", ID_A64ISAR0_EL1, 0}, + {"id_aa64isar1_el1", ID_A64ISAR1_EL1, 0}, + {"id_aa64mmfr0_el1", ID_A64MMFR0_EL1, 0}, + {"id_aa64mmfr1_el1", ID_A64MMFR1_EL1, 0}, + {"mvfr0_el1", MVFR0_EL1, 0}, + {"mvfr1_el1", MVFR1_EL1, 0}, + {"mvfr2_el1", MVFR2_EL1, 0}, + {"rvbar_el1", RVBAR_EL1, 0}, + {"rvbar_el2", RVBAR_EL2, 0}, + {"rvbar_el3", RVBAR_EL3, 0}, + {"isr_el1", ISR_EL1, 0}, + {"cntpct_el0", CNTPCT_EL0, 0}, + {"cntvct_el0", CNTVCT_EL0, 0}, // Trace registers - {"trcstatr", TRCSTATR}, - {"trcidr8", TRCIDR8}, - {"trcidr9", TRCIDR9}, - {"trcidr10", TRCIDR10}, - {"trcidr11", TRCIDR11}, - {"trcidr12", TRCIDR12}, - {"trcidr13", TRCIDR13}, - {"trcidr0", TRCIDR0}, - {"trcidr1", TRCIDR1}, - {"trcidr2", TRCIDR2}, - {"trcidr3", TRCIDR3}, - {"trcidr4", TRCIDR4}, - {"trcidr5", TRCIDR5}, - {"trcidr6", TRCIDR6}, - {"trcidr7", TRCIDR7}, - {"trcoslsr", TRCOSLSR}, - {"trcpdsr", TRCPDSR}, - {"trcdevaff0", TRCDEVAFF0}, - {"trcdevaff1", TRCDEVAFF1}, - {"trclsr", TRCLSR}, - {"trcauthstatus", TRCAUTHSTATUS}, - {"trcdevarch", TRCDEVARCH}, - {"trcdevid", TRCDEVID}, - {"trcdevtype", TRCDEVTYPE}, - {"trcpidr4", TRCPIDR4}, - {"trcpidr5", TRCPIDR5}, - {"trcpidr6", TRCPIDR6}, - {"trcpidr7", TRCPIDR7}, - {"trcpidr0", TRCPIDR0}, - {"trcpidr1", TRCPIDR1}, - {"trcpidr2", TRCPIDR2}, - {"trcpidr3", TRCPIDR3}, - {"trccidr0", TRCCIDR0}, - {"trccidr1", TRCCIDR1}, - {"trccidr2", TRCCIDR2}, - {"trccidr3", TRCCIDR3}, + {"trcstatr", TRCSTATR, 0}, + {"trcidr8", TRCIDR8, 0}, + {"trcidr9", TRCIDR9, 0}, + {"trcidr10", TRCIDR10, 0}, + {"trcidr11", TRCIDR11, 0}, + {"trcidr12", TRCIDR12, 0}, + {"trcidr13", TRCIDR13, 0}, + {"trcidr0", TRCIDR0, 0}, + {"trcidr1", TRCIDR1, 0}, + {"trcidr2", TRCIDR2, 0}, + {"trcidr3", TRCIDR3, 0}, + {"trcidr4", TRCIDR4, 0}, + {"trcidr5", TRCIDR5, 0}, + {"trcidr6", TRCIDR6, 0}, + {"trcidr7", TRCIDR7, 0}, + {"trcoslsr", TRCOSLSR, 0}, + {"trcpdsr", TRCPDSR, 0}, + {"trcdevaff0", TRCDEVAFF0, 0}, + {"trcdevaff1", TRCDEVAFF1, 0}, + {"trclsr", TRCLSR, 0}, + {"trcauthstatus", TRCAUTHSTATUS, 0}, + {"trcdevarch", TRCDEVARCH, 0}, + {"trcdevid", TRCDEVID, 0}, + {"trcdevtype", TRCDEVTYPE, 0}, + {"trcpidr4", TRCPIDR4, 0}, + {"trcpidr5", TRCPIDR5, 0}, + {"trcpidr6", TRCPIDR6, 0}, + {"trcpidr7", TRCPIDR7, 0}, + {"trcpidr0", TRCPIDR0, 0}, + {"trcpidr1", TRCPIDR1, 0}, + {"trcpidr2", TRCPIDR2, 0}, + {"trcpidr3", TRCPIDR3, 0}, + {"trccidr0", TRCCIDR0, 0}, + {"trccidr1", TRCCIDR1, 0}, + {"trccidr2", TRCCIDR2, 0}, + {"trccidr3", TRCCIDR3, 0}, // GICv3 registers - {"icc_iar1_el1", ICC_IAR1_EL1}, - {"icc_iar0_el1", ICC_IAR0_EL1}, - {"icc_hppir1_el1", ICC_HPPIR1_EL1}, - {"icc_hppir0_el1", ICC_HPPIR0_EL1}, - {"icc_rpr_el1", ICC_RPR_EL1}, - {"ich_vtr_el2", ICH_VTR_EL2}, - {"ich_eisr_el2", ICH_EISR_EL2}, - {"ich_elsr_el2", ICH_ELSR_EL2} + {"icc_iar1_el1", ICC_IAR1_EL1, 0}, + {"icc_iar0_el1", ICC_IAR0_EL1, 0}, + {"icc_hppir1_el1", ICC_HPPIR1_EL1, 0}, + {"icc_hppir0_el1", ICC_HPPIR0_EL1, 0}, + {"icc_rpr_el1", ICC_RPR_EL1, 0}, + {"ich_vtr_el2", ICH_VTR_EL2, 0}, + {"ich_eisr_el2", ICH_EISR_EL2, 0}, + {"ich_elsr_el2", ICH_ELSR_EL2, 0} }; -AArch64SysReg::MRSMapper::MRSMapper(uint64_t FeatureBits) - : SysRegMapper(FeatureBits) { +AArch64SysReg::MRSMapper::MRSMapper() { InstMappings = &MRSMappings[0]; NumInstMappings = llvm::array_lengthof(MRSMappings); } const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRMappings[] = { - {"dbgdtrtx_el0", DBGDTRTX_EL0}, - {"oslar_el1", OSLAR_EL1}, - {"pmswinc_el0", PMSWINC_EL0}, + {"dbgdtrtx_el0", DBGDTRTX_EL0, 0}, + {"oslar_el1", OSLAR_EL1, 0}, + {"pmswinc_el0", PMSWINC_EL0, 0}, // Trace registers - {"trcoslar", TRCOSLAR}, - {"trclar", TRCLAR}, + {"trcoslar", TRCOSLAR, 0}, + {"trclar", TRCLAR, 0}, // GICv3 registers - {"icc_eoir1_el1", ICC_EOIR1_EL1}, - {"icc_eoir0_el1", ICC_EOIR0_EL1}, - {"icc_dir_el1", ICC_DIR_EL1}, - {"icc_sgi1r_el1", ICC_SGI1R_EL1}, - {"icc_asgi1r_el1", ICC_ASGI1R_EL1}, - {"icc_sgi0r_el1", ICC_SGI0R_EL1} + {"icc_eoir1_el1", ICC_EOIR1_EL1, 0}, + {"icc_eoir0_el1", ICC_EOIR0_EL1, 0}, + {"icc_dir_el1", ICC_DIR_EL1, 0}, + {"icc_sgi1r_el1", ICC_SGI1R_EL1, 0}, + {"icc_asgi1r_el1", ICC_ASGI1R_EL1, 0}, + {"icc_sgi0r_el1", ICC_SGI0R_EL1, 0}, + + // v8.1a "Privileged Access Never" extension-specific system registers + {"pan", PAN, AArch64::HasV8_1aOps}, }; -AArch64SysReg::MSRMapper::MSRMapper(uint64_t FeatureBits) - : SysRegMapper(FeatureBits) { +AArch64SysReg::MSRMapper::MSRMapper() { InstMappings = &MSRMappings[0]; NumInstMappings = llvm::array_lengthof(MSRMappings); } const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegMappings[] = { - {"osdtrrx_el1", OSDTRRX_EL1}, - {"osdtrtx_el1", OSDTRTX_EL1}, - {"teecr32_el1", TEECR32_EL1}, - {"mdccint_el1", MDCCINT_EL1}, - {"mdscr_el1", MDSCR_EL1}, - {"dbgdtr_el0", DBGDTR_EL0}, - {"oseccr_el1", OSECCR_EL1}, - {"dbgvcr32_el2", DBGVCR32_EL2}, - {"dbgbvr0_el1", DBGBVR0_EL1}, - {"dbgbvr1_el1", DBGBVR1_EL1}, - {"dbgbvr2_el1", DBGBVR2_EL1}, - {"dbgbvr3_el1", DBGBVR3_EL1}, - {"dbgbvr4_el1", DBGBVR4_EL1}, - {"dbgbvr5_el1", DBGBVR5_EL1}, - {"dbgbvr6_el1", DBGBVR6_EL1}, - {"dbgbvr7_el1", DBGBVR7_EL1}, - {"dbgbvr8_el1", DBGBVR8_EL1}, - {"dbgbvr9_el1", DBGBVR9_EL1}, - {"dbgbvr10_el1", DBGBVR10_EL1}, - {"dbgbvr11_el1", DBGBVR11_EL1}, - {"dbgbvr12_el1", DBGBVR12_EL1}, - {"dbgbvr13_el1", DBGBVR13_EL1}, - {"dbgbvr14_el1", DBGBVR14_EL1}, - {"dbgbvr15_el1", DBGBVR15_EL1}, - {"dbgbcr0_el1", DBGBCR0_EL1}, - {"dbgbcr1_el1", DBGBCR1_EL1}, - {"dbgbcr2_el1", DBGBCR2_EL1}, - {"dbgbcr3_el1", DBGBCR3_EL1}, - {"dbgbcr4_el1", DBGBCR4_EL1}, - {"dbgbcr5_el1", DBGBCR5_EL1}, - {"dbgbcr6_el1", DBGBCR6_EL1}, - {"dbgbcr7_el1", DBGBCR7_EL1}, - {"dbgbcr8_el1", DBGBCR8_EL1}, - {"dbgbcr9_el1", DBGBCR9_EL1}, - {"dbgbcr10_el1", DBGBCR10_EL1}, - {"dbgbcr11_el1", DBGBCR11_EL1}, - {"dbgbcr12_el1", DBGBCR12_EL1}, - {"dbgbcr13_el1", DBGBCR13_EL1}, - {"dbgbcr14_el1", DBGBCR14_EL1}, - {"dbgbcr15_el1", DBGBCR15_EL1}, - {"dbgwvr0_el1", DBGWVR0_EL1}, - {"dbgwvr1_el1", DBGWVR1_EL1}, - {"dbgwvr2_el1", DBGWVR2_EL1}, - {"dbgwvr3_el1", DBGWVR3_EL1}, - {"dbgwvr4_el1", DBGWVR4_EL1}, - {"dbgwvr5_el1", DBGWVR5_EL1}, - {"dbgwvr6_el1", DBGWVR6_EL1}, - {"dbgwvr7_el1", DBGWVR7_EL1}, - {"dbgwvr8_el1", DBGWVR8_EL1}, - {"dbgwvr9_el1", DBGWVR9_EL1}, - {"dbgwvr10_el1", DBGWVR10_EL1}, - {"dbgwvr11_el1", DBGWVR11_EL1}, - {"dbgwvr12_el1", DBGWVR12_EL1}, - {"dbgwvr13_el1", DBGWVR13_EL1}, - {"dbgwvr14_el1", DBGWVR14_EL1}, - {"dbgwvr15_el1", DBGWVR15_EL1}, - {"dbgwcr0_el1", DBGWCR0_EL1}, - {"dbgwcr1_el1", DBGWCR1_EL1}, - {"dbgwcr2_el1", DBGWCR2_EL1}, - {"dbgwcr3_el1", DBGWCR3_EL1}, - {"dbgwcr4_el1", DBGWCR4_EL1}, - {"dbgwcr5_el1", DBGWCR5_EL1}, - {"dbgwcr6_el1", DBGWCR6_EL1}, - {"dbgwcr7_el1", DBGWCR7_EL1}, - {"dbgwcr8_el1", DBGWCR8_EL1}, - {"dbgwcr9_el1", DBGWCR9_EL1}, - {"dbgwcr10_el1", DBGWCR10_EL1}, - {"dbgwcr11_el1", DBGWCR11_EL1}, - {"dbgwcr12_el1", DBGWCR12_EL1}, - {"dbgwcr13_el1", DBGWCR13_EL1}, - {"dbgwcr14_el1", DBGWCR14_EL1}, - {"dbgwcr15_el1", DBGWCR15_EL1}, - {"teehbr32_el1", TEEHBR32_EL1}, - {"osdlr_el1", OSDLR_EL1}, - {"dbgprcr_el1", DBGPRCR_EL1}, - {"dbgclaimset_el1", DBGCLAIMSET_EL1}, - {"dbgclaimclr_el1", DBGCLAIMCLR_EL1}, - {"csselr_el1", CSSELR_EL1}, - {"vpidr_el2", VPIDR_EL2}, - {"vmpidr_el2", VMPIDR_EL2}, - {"sctlr_el1", SCTLR_EL1}, - {"sctlr_el2", SCTLR_EL2}, - {"sctlr_el3", SCTLR_EL3}, - {"actlr_el1", ACTLR_EL1}, - {"actlr_el2", ACTLR_EL2}, - {"actlr_el3", ACTLR_EL3}, - {"cpacr_el1", CPACR_EL1}, - {"hcr_el2", HCR_EL2}, - {"scr_el3", SCR_EL3}, - {"mdcr_el2", MDCR_EL2}, - {"sder32_el3", SDER32_EL3}, - {"cptr_el2", CPTR_EL2}, - {"cptr_el3", CPTR_EL3}, - {"hstr_el2", HSTR_EL2}, - {"hacr_el2", HACR_EL2}, - {"mdcr_el3", MDCR_EL3}, - {"ttbr0_el1", TTBR0_EL1}, - {"ttbr0_el2", TTBR0_EL2}, - {"ttbr0_el3", TTBR0_EL3}, - {"ttbr1_el1", TTBR1_EL1}, - {"tcr_el1", TCR_EL1}, - {"tcr_el2", TCR_EL2}, - {"tcr_el3", TCR_EL3}, - {"vttbr_el2", VTTBR_EL2}, - {"vtcr_el2", VTCR_EL2}, - {"dacr32_el2", DACR32_EL2}, - {"spsr_el1", SPSR_EL1}, - {"spsr_el2", SPSR_EL2}, - {"spsr_el3", SPSR_EL3}, - {"elr_el1", ELR_EL1}, - {"elr_el2", ELR_EL2}, - {"elr_el3", ELR_EL3}, - {"sp_el0", SP_EL0}, - {"sp_el1", SP_EL1}, - {"sp_el2", SP_EL2}, - {"spsel", SPSel}, - {"nzcv", NZCV}, - {"daif", DAIF}, - {"currentel", CurrentEL}, - {"spsr_irq", SPSR_irq}, - {"spsr_abt", SPSR_abt}, - {"spsr_und", SPSR_und}, - {"spsr_fiq", SPSR_fiq}, - {"fpcr", FPCR}, - {"fpsr", FPSR}, - {"dspsr_el0", DSPSR_EL0}, - {"dlr_el0", DLR_EL0}, - {"ifsr32_el2", IFSR32_EL2}, - {"afsr0_el1", AFSR0_EL1}, - {"afsr0_el2", AFSR0_EL2}, - {"afsr0_el3", AFSR0_EL3}, - {"afsr1_el1", AFSR1_EL1}, - {"afsr1_el2", AFSR1_EL2}, - {"afsr1_el3", AFSR1_EL3}, - {"esr_el1", ESR_EL1}, - {"esr_el2", ESR_EL2}, - {"esr_el3", ESR_EL3}, - {"fpexc32_el2", FPEXC32_EL2}, - {"far_el1", FAR_EL1}, - {"far_el2", FAR_EL2}, - {"far_el3", FAR_EL3}, - {"hpfar_el2", HPFAR_EL2}, - {"par_el1", PAR_EL1}, - {"pmcr_el0", PMCR_EL0}, - {"pmcntenset_el0", PMCNTENSET_EL0}, - {"pmcntenclr_el0", PMCNTENCLR_EL0}, - {"pmovsclr_el0", PMOVSCLR_EL0}, - {"pmselr_el0", PMSELR_EL0}, - {"pmccntr_el0", PMCCNTR_EL0}, - {"pmxevtyper_el0", PMXEVTYPER_EL0}, - {"pmxevcntr_el0", PMXEVCNTR_EL0}, - {"pmuserenr_el0", PMUSERENR_EL0}, - {"pmintenset_el1", PMINTENSET_EL1}, - {"pmintenclr_el1", PMINTENCLR_EL1}, - {"pmovsset_el0", PMOVSSET_EL0}, - {"mair_el1", MAIR_EL1}, - {"mair_el2", MAIR_EL2}, - {"mair_el3", MAIR_EL3}, - {"amair_el1", AMAIR_EL1}, - {"amair_el2", AMAIR_EL2}, - {"amair_el3", AMAIR_EL3}, - {"vbar_el1", VBAR_EL1}, - {"vbar_el2", VBAR_EL2}, - {"vbar_el3", VBAR_EL3}, - {"rmr_el1", RMR_EL1}, - {"rmr_el2", RMR_EL2}, - {"rmr_el3", RMR_EL3}, - {"contextidr_el1", CONTEXTIDR_EL1}, - {"tpidr_el0", TPIDR_EL0}, - {"tpidr_el2", TPIDR_EL2}, - {"tpidr_el3", TPIDR_EL3}, - {"tpidrro_el0", TPIDRRO_EL0}, - {"tpidr_el1", TPIDR_EL1}, - {"cntfrq_el0", CNTFRQ_EL0}, - {"cntvoff_el2", CNTVOFF_EL2}, - {"cntkctl_el1", CNTKCTL_EL1}, - {"cnthctl_el2", CNTHCTL_EL2}, - {"cntp_tval_el0", CNTP_TVAL_EL0}, - {"cnthp_tval_el2", CNTHP_TVAL_EL2}, - {"cntps_tval_el1", CNTPS_TVAL_EL1}, - {"cntp_ctl_el0", CNTP_CTL_EL0}, - {"cnthp_ctl_el2", CNTHP_CTL_EL2}, - {"cntps_ctl_el1", CNTPS_CTL_EL1}, - {"cntp_cval_el0", CNTP_CVAL_EL0}, - {"cnthp_cval_el2", CNTHP_CVAL_EL2}, - {"cntps_cval_el1", CNTPS_CVAL_EL1}, - {"cntv_tval_el0", CNTV_TVAL_EL0}, - {"cntv_ctl_el0", CNTV_CTL_EL0}, - {"cntv_cval_el0", CNTV_CVAL_EL0}, - {"pmevcntr0_el0", PMEVCNTR0_EL0}, - {"pmevcntr1_el0", PMEVCNTR1_EL0}, - {"pmevcntr2_el0", PMEVCNTR2_EL0}, - {"pmevcntr3_el0", PMEVCNTR3_EL0}, - {"pmevcntr4_el0", PMEVCNTR4_EL0}, - {"pmevcntr5_el0", PMEVCNTR5_EL0}, - {"pmevcntr6_el0", PMEVCNTR6_EL0}, - {"pmevcntr7_el0", PMEVCNTR7_EL0}, - {"pmevcntr8_el0", PMEVCNTR8_EL0}, - {"pmevcntr9_el0", PMEVCNTR9_EL0}, - {"pmevcntr10_el0", PMEVCNTR10_EL0}, - {"pmevcntr11_el0", PMEVCNTR11_EL0}, - {"pmevcntr12_el0", PMEVCNTR12_EL0}, - {"pmevcntr13_el0", PMEVCNTR13_EL0}, - {"pmevcntr14_el0", PMEVCNTR14_EL0}, - {"pmevcntr15_el0", PMEVCNTR15_EL0}, - {"pmevcntr16_el0", PMEVCNTR16_EL0}, - {"pmevcntr17_el0", PMEVCNTR17_EL0}, - {"pmevcntr18_el0", PMEVCNTR18_EL0}, - {"pmevcntr19_el0", PMEVCNTR19_EL0}, - {"pmevcntr20_el0", PMEVCNTR20_EL0}, - {"pmevcntr21_el0", PMEVCNTR21_EL0}, - {"pmevcntr22_el0", PMEVCNTR22_EL0}, - {"pmevcntr23_el0", PMEVCNTR23_EL0}, - {"pmevcntr24_el0", PMEVCNTR24_EL0}, - {"pmevcntr25_el0", PMEVCNTR25_EL0}, - {"pmevcntr26_el0", PMEVCNTR26_EL0}, - {"pmevcntr27_el0", PMEVCNTR27_EL0}, - {"pmevcntr28_el0", PMEVCNTR28_EL0}, - {"pmevcntr29_el0", PMEVCNTR29_EL0}, - {"pmevcntr30_el0", PMEVCNTR30_EL0}, - {"pmccfiltr_el0", PMCCFILTR_EL0}, - {"pmevtyper0_el0", PMEVTYPER0_EL0}, - {"pmevtyper1_el0", PMEVTYPER1_EL0}, - {"pmevtyper2_el0", PMEVTYPER2_EL0}, - {"pmevtyper3_el0", PMEVTYPER3_EL0}, - {"pmevtyper4_el0", PMEVTYPER4_EL0}, - {"pmevtyper5_el0", PMEVTYPER5_EL0}, - {"pmevtyper6_el0", PMEVTYPER6_EL0}, - {"pmevtyper7_el0", PMEVTYPER7_EL0}, - {"pmevtyper8_el0", PMEVTYPER8_EL0}, - {"pmevtyper9_el0", PMEVTYPER9_EL0}, - {"pmevtyper10_el0", PMEVTYPER10_EL0}, - {"pmevtyper11_el0", PMEVTYPER11_EL0}, - {"pmevtyper12_el0", PMEVTYPER12_EL0}, - {"pmevtyper13_el0", PMEVTYPER13_EL0}, - {"pmevtyper14_el0", PMEVTYPER14_EL0}, - {"pmevtyper15_el0", PMEVTYPER15_EL0}, - {"pmevtyper16_el0", PMEVTYPER16_EL0}, - {"pmevtyper17_el0", PMEVTYPER17_EL0}, - {"pmevtyper18_el0", PMEVTYPER18_EL0}, - {"pmevtyper19_el0", PMEVTYPER19_EL0}, - {"pmevtyper20_el0", PMEVTYPER20_EL0}, - {"pmevtyper21_el0", PMEVTYPER21_EL0}, - {"pmevtyper22_el0", PMEVTYPER22_EL0}, - {"pmevtyper23_el0", PMEVTYPER23_EL0}, - {"pmevtyper24_el0", PMEVTYPER24_EL0}, - {"pmevtyper25_el0", PMEVTYPER25_EL0}, - {"pmevtyper26_el0", PMEVTYPER26_EL0}, - {"pmevtyper27_el0", PMEVTYPER27_EL0}, - {"pmevtyper28_el0", PMEVTYPER28_EL0}, - {"pmevtyper29_el0", PMEVTYPER29_EL0}, - {"pmevtyper30_el0", PMEVTYPER30_EL0}, + {"osdtrrx_el1", OSDTRRX_EL1, 0}, + {"osdtrtx_el1", OSDTRTX_EL1, 0}, + {"teecr32_el1", TEECR32_EL1, 0}, + {"mdccint_el1", MDCCINT_EL1, 0}, + {"mdscr_el1", MDSCR_EL1, 0}, + {"dbgdtr_el0", DBGDTR_EL0, 0}, + {"oseccr_el1", OSECCR_EL1, 0}, + {"dbgvcr32_el2", DBGVCR32_EL2, 0}, + {"dbgbvr0_el1", DBGBVR0_EL1, 0}, + {"dbgbvr1_el1", DBGBVR1_EL1, 0}, + {"dbgbvr2_el1", DBGBVR2_EL1, 0}, + {"dbgbvr3_el1", DBGBVR3_EL1, 0}, + {"dbgbvr4_el1", DBGBVR4_EL1, 0}, + {"dbgbvr5_el1", DBGBVR5_EL1, 0}, + {"dbgbvr6_el1", DBGBVR6_EL1, 0}, + {"dbgbvr7_el1", DBGBVR7_EL1, 0}, + {"dbgbvr8_el1", DBGBVR8_EL1, 0}, + {"dbgbvr9_el1", DBGBVR9_EL1, 0}, + {"dbgbvr10_el1", DBGBVR10_EL1, 0}, + {"dbgbvr11_el1", DBGBVR11_EL1, 0}, + {"dbgbvr12_el1", DBGBVR12_EL1, 0}, + {"dbgbvr13_el1", DBGBVR13_EL1, 0}, + {"dbgbvr14_el1", DBGBVR14_EL1, 0}, + {"dbgbvr15_el1", DBGBVR15_EL1, 0}, + {"dbgbcr0_el1", DBGBCR0_EL1, 0}, + {"dbgbcr1_el1", DBGBCR1_EL1, 0}, + {"dbgbcr2_el1", DBGBCR2_EL1, 0}, + {"dbgbcr3_el1", DBGBCR3_EL1, 0}, + {"dbgbcr4_el1", DBGBCR4_EL1, 0}, + {"dbgbcr5_el1", DBGBCR5_EL1, 0}, + {"dbgbcr6_el1", DBGBCR6_EL1, 0}, + {"dbgbcr7_el1", DBGBCR7_EL1, 0}, + {"dbgbcr8_el1", DBGBCR8_EL1, 0}, + {"dbgbcr9_el1", DBGBCR9_EL1, 0}, + {"dbgbcr10_el1", DBGBCR10_EL1, 0}, + {"dbgbcr11_el1", DBGBCR11_EL1, 0}, + {"dbgbcr12_el1", DBGBCR12_EL1, 0}, + {"dbgbcr13_el1", DBGBCR13_EL1, 0}, + {"dbgbcr14_el1", DBGBCR14_EL1, 0}, + {"dbgbcr15_el1", DBGBCR15_EL1, 0}, + {"dbgwvr0_el1", DBGWVR0_EL1, 0}, + {"dbgwvr1_el1", DBGWVR1_EL1, 0}, + {"dbgwvr2_el1", DBGWVR2_EL1, 0}, + {"dbgwvr3_el1", DBGWVR3_EL1, 0}, + {"dbgwvr4_el1", DBGWVR4_EL1, 0}, + {"dbgwvr5_el1", DBGWVR5_EL1, 0}, + {"dbgwvr6_el1", DBGWVR6_EL1, 0}, + {"dbgwvr7_el1", DBGWVR7_EL1, 0}, + {"dbgwvr8_el1", DBGWVR8_EL1, 0}, + {"dbgwvr9_el1", DBGWVR9_EL1, 0}, + {"dbgwvr10_el1", DBGWVR10_EL1, 0}, + {"dbgwvr11_el1", DBGWVR11_EL1, 0}, + {"dbgwvr12_el1", DBGWVR12_EL1, 0}, + {"dbgwvr13_el1", DBGWVR13_EL1, 0}, + {"dbgwvr14_el1", DBGWVR14_EL1, 0}, + {"dbgwvr15_el1", DBGWVR15_EL1, 0}, + {"dbgwcr0_el1", DBGWCR0_EL1, 0}, + {"dbgwcr1_el1", DBGWCR1_EL1, 0}, + {"dbgwcr2_el1", DBGWCR2_EL1, 0}, + {"dbgwcr3_el1", DBGWCR3_EL1, 0}, + {"dbgwcr4_el1", DBGWCR4_EL1, 0}, + {"dbgwcr5_el1", DBGWCR5_EL1, 0}, + {"dbgwcr6_el1", DBGWCR6_EL1, 0}, + {"dbgwcr7_el1", DBGWCR7_EL1, 0}, + {"dbgwcr8_el1", DBGWCR8_EL1, 0}, + {"dbgwcr9_el1", DBGWCR9_EL1, 0}, + {"dbgwcr10_el1", DBGWCR10_EL1, 0}, + {"dbgwcr11_el1", DBGWCR11_EL1, 0}, + {"dbgwcr12_el1", DBGWCR12_EL1, 0}, + {"dbgwcr13_el1", DBGWCR13_EL1, 0}, + {"dbgwcr14_el1", DBGWCR14_EL1, 0}, + {"dbgwcr15_el1", DBGWCR15_EL1, 0}, + {"teehbr32_el1", TEEHBR32_EL1, 0}, + {"osdlr_el1", OSDLR_EL1, 0}, + {"dbgprcr_el1", DBGPRCR_EL1, 0}, + {"dbgclaimset_el1", DBGCLAIMSET_EL1, 0}, + {"dbgclaimclr_el1", DBGCLAIMCLR_EL1, 0}, + {"csselr_el1", CSSELR_EL1, 0}, + {"vpidr_el2", VPIDR_EL2, 0}, + {"vmpidr_el2", VMPIDR_EL2, 0}, + {"sctlr_el1", SCTLR_EL1, 0}, + {"sctlr_el2", SCTLR_EL2, 0}, + {"sctlr_el3", SCTLR_EL3, 0}, + {"actlr_el1", ACTLR_EL1, 0}, + {"actlr_el2", ACTLR_EL2, 0}, + {"actlr_el3", ACTLR_EL3, 0}, + {"cpacr_el1", CPACR_EL1, 0}, + {"hcr_el2", HCR_EL2, 0}, + {"scr_el3", SCR_EL3, 0}, + {"mdcr_el2", MDCR_EL2, 0}, + {"sder32_el3", SDER32_EL3, 0}, + {"cptr_el2", CPTR_EL2, 0}, + {"cptr_el3", CPTR_EL3, 0}, + {"hstr_el2", HSTR_EL2, 0}, + {"hacr_el2", HACR_EL2, 0}, + {"mdcr_el3", MDCR_EL3, 0}, + {"ttbr0_el1", TTBR0_EL1, 0}, + {"ttbr0_el2", TTBR0_EL2, 0}, + {"ttbr0_el3", TTBR0_EL3, 0}, + {"ttbr1_el1", TTBR1_EL1, 0}, + {"tcr_el1", TCR_EL1, 0}, + {"tcr_el2", TCR_EL2, 0}, + {"tcr_el3", TCR_EL3, 0}, + {"vttbr_el2", VTTBR_EL2, 0}, + {"vtcr_el2", VTCR_EL2, 0}, + {"dacr32_el2", DACR32_EL2, 0}, + {"spsr_el1", SPSR_EL1, 0}, + {"spsr_el2", SPSR_EL2, 0}, + {"spsr_el3", SPSR_EL3, 0}, + {"elr_el1", ELR_EL1, 0}, + {"elr_el2", ELR_EL2, 0}, + {"elr_el3", ELR_EL3, 0}, + {"sp_el0", SP_EL0, 0}, + {"sp_el1", SP_EL1, 0}, + {"sp_el2", SP_EL2, 0}, + {"spsel", SPSel, 0}, + {"nzcv", NZCV, 0}, + {"daif", DAIF, 0}, + {"currentel", CurrentEL, 0}, + {"spsr_irq", SPSR_irq, 0}, + {"spsr_abt", SPSR_abt, 0}, + {"spsr_und", SPSR_und, 0}, + {"spsr_fiq", SPSR_fiq, 0}, + {"fpcr", FPCR, 0}, + {"fpsr", FPSR, 0}, + {"dspsr_el0", DSPSR_EL0, 0}, + {"dlr_el0", DLR_EL0, 0}, + {"ifsr32_el2", IFSR32_EL2, 0}, + {"afsr0_el1", AFSR0_EL1, 0}, + {"afsr0_el2", AFSR0_EL2, 0}, + {"afsr0_el3", AFSR0_EL3, 0}, + {"afsr1_el1", AFSR1_EL1, 0}, + {"afsr1_el2", AFSR1_EL2, 0}, + {"afsr1_el3", AFSR1_EL3, 0}, + {"esr_el1", ESR_EL1, 0}, + {"esr_el2", ESR_EL2, 0}, + {"esr_el3", ESR_EL3, 0}, + {"fpexc32_el2", FPEXC32_EL2, 0}, + {"far_el1", FAR_EL1, 0}, + {"far_el2", FAR_EL2, 0}, + {"far_el3", FAR_EL3, 0}, + {"hpfar_el2", HPFAR_EL2, 0}, + {"par_el1", PAR_EL1, 0}, + {"pmcr_el0", PMCR_EL0, 0}, + {"pmcntenset_el0", PMCNTENSET_EL0, 0}, + {"pmcntenclr_el0", PMCNTENCLR_EL0, 0}, + {"pmovsclr_el0", PMOVSCLR_EL0, 0}, + {"pmselr_el0", PMSELR_EL0, 0}, + {"pmccntr_el0", PMCCNTR_EL0, 0}, + {"pmxevtyper_el0", PMXEVTYPER_EL0, 0}, + {"pmxevcntr_el0", PMXEVCNTR_EL0, 0}, + {"pmuserenr_el0", PMUSERENR_EL0, 0}, + {"pmintenset_el1", PMINTENSET_EL1, 0}, + {"pmintenclr_el1", PMINTENCLR_EL1, 0}, + {"pmovsset_el0", PMOVSSET_EL0, 0}, + {"mair_el1", MAIR_EL1, 0}, + {"mair_el2", MAIR_EL2, 0}, + {"mair_el3", MAIR_EL3, 0}, + {"amair_el1", AMAIR_EL1, 0}, + {"amair_el2", AMAIR_EL2, 0}, + {"amair_el3", AMAIR_EL3, 0}, + {"vbar_el1", VBAR_EL1, 0}, + {"vbar_el2", VBAR_EL2, 0}, + {"vbar_el3", VBAR_EL3, 0}, + {"rmr_el1", RMR_EL1, 0}, + {"rmr_el2", RMR_EL2, 0}, + {"rmr_el3", RMR_EL3, 0}, + {"contextidr_el1", CONTEXTIDR_EL1, 0}, + {"tpidr_el0", TPIDR_EL0, 0}, + {"tpidr_el2", TPIDR_EL2, 0}, + {"tpidr_el3", TPIDR_EL3, 0}, + {"tpidrro_el0", TPIDRRO_EL0, 0}, + {"tpidr_el1", TPIDR_EL1, 0}, + {"cntfrq_el0", CNTFRQ_EL0, 0}, + {"cntvoff_el2", CNTVOFF_EL2, 0}, + {"cntkctl_el1", CNTKCTL_EL1, 0}, + {"cnthctl_el2", CNTHCTL_EL2, 0}, + {"cntp_tval_el0", CNTP_TVAL_EL0, 0}, + {"cnthp_tval_el2", CNTHP_TVAL_EL2, 0}, + {"cntps_tval_el1", CNTPS_TVAL_EL1, 0}, + {"cntp_ctl_el0", CNTP_CTL_EL0, 0}, + {"cnthp_ctl_el2", CNTHP_CTL_EL2, 0}, + {"cntps_ctl_el1", CNTPS_CTL_EL1, 0}, + {"cntp_cval_el0", CNTP_CVAL_EL0, 0}, + {"cnthp_cval_el2", CNTHP_CVAL_EL2, 0}, + {"cntps_cval_el1", CNTPS_CVAL_EL1, 0}, + {"cntv_tval_el0", CNTV_TVAL_EL0, 0}, + {"cntv_ctl_el0", CNTV_CTL_EL0, 0}, + {"cntv_cval_el0", CNTV_CVAL_EL0, 0}, + {"pmevcntr0_el0", PMEVCNTR0_EL0, 0}, + {"pmevcntr1_el0", PMEVCNTR1_EL0, 0}, + {"pmevcntr2_el0", PMEVCNTR2_EL0, 0}, + {"pmevcntr3_el0", PMEVCNTR3_EL0, 0}, + {"pmevcntr4_el0", PMEVCNTR4_EL0, 0}, + {"pmevcntr5_el0", PMEVCNTR5_EL0, 0}, + {"pmevcntr6_el0", PMEVCNTR6_EL0, 0}, + {"pmevcntr7_el0", PMEVCNTR7_EL0, 0}, + {"pmevcntr8_el0", PMEVCNTR8_EL0, 0}, + {"pmevcntr9_el0", PMEVCNTR9_EL0, 0}, + {"pmevcntr10_el0", PMEVCNTR10_EL0, 0}, + {"pmevcntr11_el0", PMEVCNTR11_EL0, 0}, + {"pmevcntr12_el0", PMEVCNTR12_EL0, 0}, + {"pmevcntr13_el0", PMEVCNTR13_EL0, 0}, + {"pmevcntr14_el0", PMEVCNTR14_EL0, 0}, + {"pmevcntr15_el0", PMEVCNTR15_EL0, 0}, + {"pmevcntr16_el0", PMEVCNTR16_EL0, 0}, + {"pmevcntr17_el0", PMEVCNTR17_EL0, 0}, + {"pmevcntr18_el0", PMEVCNTR18_EL0, 0}, + {"pmevcntr19_el0", PMEVCNTR19_EL0, 0}, + {"pmevcntr20_el0", PMEVCNTR20_EL0, 0}, + {"pmevcntr21_el0", PMEVCNTR21_EL0, 0}, + {"pmevcntr22_el0", PMEVCNTR22_EL0, 0}, + {"pmevcntr23_el0", PMEVCNTR23_EL0, 0}, + {"pmevcntr24_el0", PMEVCNTR24_EL0, 0}, + {"pmevcntr25_el0", PMEVCNTR25_EL0, 0}, + {"pmevcntr26_el0", PMEVCNTR26_EL0, 0}, + {"pmevcntr27_el0", PMEVCNTR27_EL0, 0}, + {"pmevcntr28_el0", PMEVCNTR28_EL0, 0}, + {"pmevcntr29_el0", PMEVCNTR29_EL0, 0}, + {"pmevcntr30_el0", PMEVCNTR30_EL0, 0}, + {"pmccfiltr_el0", PMCCFILTR_EL0, 0}, + {"pmevtyper0_el0", PMEVTYPER0_EL0, 0}, + {"pmevtyper1_el0", PMEVTYPER1_EL0, 0}, + {"pmevtyper2_el0", PMEVTYPER2_EL0, 0}, + {"pmevtyper3_el0", PMEVTYPER3_EL0, 0}, + {"pmevtyper4_el0", PMEVTYPER4_EL0, 0}, + {"pmevtyper5_el0", PMEVTYPER5_EL0, 0}, + {"pmevtyper6_el0", PMEVTYPER6_EL0, 0}, + {"pmevtyper7_el0", PMEVTYPER7_EL0, 0}, + {"pmevtyper8_el0", PMEVTYPER8_EL0, 0}, + {"pmevtyper9_el0", PMEVTYPER9_EL0, 0}, + {"pmevtyper10_el0", PMEVTYPER10_EL0, 0}, + {"pmevtyper11_el0", PMEVTYPER11_EL0, 0}, + {"pmevtyper12_el0", PMEVTYPER12_EL0, 0}, + {"pmevtyper13_el0", PMEVTYPER13_EL0, 0}, + {"pmevtyper14_el0", PMEVTYPER14_EL0, 0}, + {"pmevtyper15_el0", PMEVTYPER15_EL0, 0}, + {"pmevtyper16_el0", PMEVTYPER16_EL0, 0}, + {"pmevtyper17_el0", PMEVTYPER17_EL0, 0}, + {"pmevtyper18_el0", PMEVTYPER18_EL0, 0}, + {"pmevtyper19_el0", PMEVTYPER19_EL0, 0}, + {"pmevtyper20_el0", PMEVTYPER20_EL0, 0}, + {"pmevtyper21_el0", PMEVTYPER21_EL0, 0}, + {"pmevtyper22_el0", PMEVTYPER22_EL0, 0}, + {"pmevtyper23_el0", PMEVTYPER23_EL0, 0}, + {"pmevtyper24_el0", PMEVTYPER24_EL0, 0}, + {"pmevtyper25_el0", PMEVTYPER25_EL0, 0}, + {"pmevtyper26_el0", PMEVTYPER26_EL0, 0}, + {"pmevtyper27_el0", PMEVTYPER27_EL0, 0}, + {"pmevtyper28_el0", PMEVTYPER28_EL0, 0}, + {"pmevtyper29_el0", PMEVTYPER29_EL0, 0}, + {"pmevtyper30_el0", PMEVTYPER30_EL0, 0}, // Trace registers - {"trcprgctlr", TRCPRGCTLR}, - {"trcprocselr", TRCPROCSELR}, - {"trcconfigr", TRCCONFIGR}, - {"trcauxctlr", TRCAUXCTLR}, - {"trceventctl0r", TRCEVENTCTL0R}, - {"trceventctl1r", TRCEVENTCTL1R}, - {"trcstallctlr", TRCSTALLCTLR}, - {"trctsctlr", TRCTSCTLR}, - {"trcsyncpr", TRCSYNCPR}, - {"trcccctlr", TRCCCCTLR}, - {"trcbbctlr", TRCBBCTLR}, - {"trctraceidr", TRCTRACEIDR}, - {"trcqctlr", TRCQCTLR}, - {"trcvictlr", TRCVICTLR}, - {"trcviiectlr", TRCVIIECTLR}, - {"trcvissctlr", TRCVISSCTLR}, - {"trcvipcssctlr", TRCVIPCSSCTLR}, - {"trcvdctlr", TRCVDCTLR}, - {"trcvdsacctlr", TRCVDSACCTLR}, - {"trcvdarcctlr", TRCVDARCCTLR}, - {"trcseqevr0", TRCSEQEVR0}, - {"trcseqevr1", TRCSEQEVR1}, - {"trcseqevr2", TRCSEQEVR2}, - {"trcseqrstevr", TRCSEQRSTEVR}, - {"trcseqstr", TRCSEQSTR}, - {"trcextinselr", TRCEXTINSELR}, - {"trccntrldvr0", TRCCNTRLDVR0}, - {"trccntrldvr1", TRCCNTRLDVR1}, - {"trccntrldvr2", TRCCNTRLDVR2}, - {"trccntrldvr3", TRCCNTRLDVR3}, - {"trccntctlr0", TRCCNTCTLR0}, - {"trccntctlr1", TRCCNTCTLR1}, - {"trccntctlr2", TRCCNTCTLR2}, - {"trccntctlr3", TRCCNTCTLR3}, - {"trccntvr0", TRCCNTVR0}, - {"trccntvr1", TRCCNTVR1}, - {"trccntvr2", TRCCNTVR2}, - {"trccntvr3", TRCCNTVR3}, - {"trcimspec0", TRCIMSPEC0}, - {"trcimspec1", TRCIMSPEC1}, - {"trcimspec2", TRCIMSPEC2}, - {"trcimspec3", TRCIMSPEC3}, - {"trcimspec4", TRCIMSPEC4}, - {"trcimspec5", TRCIMSPEC5}, - {"trcimspec6", TRCIMSPEC6}, - {"trcimspec7", TRCIMSPEC7}, - {"trcrsctlr2", TRCRSCTLR2}, - {"trcrsctlr3", TRCRSCTLR3}, - {"trcrsctlr4", TRCRSCTLR4}, - {"trcrsctlr5", TRCRSCTLR5}, - {"trcrsctlr6", TRCRSCTLR6}, - {"trcrsctlr7", TRCRSCTLR7}, - {"trcrsctlr8", TRCRSCTLR8}, - {"trcrsctlr9", TRCRSCTLR9}, - {"trcrsctlr10", TRCRSCTLR10}, - {"trcrsctlr11", TRCRSCTLR11}, - {"trcrsctlr12", TRCRSCTLR12}, - {"trcrsctlr13", TRCRSCTLR13}, - {"trcrsctlr14", TRCRSCTLR14}, - {"trcrsctlr15", TRCRSCTLR15}, - {"trcrsctlr16", TRCRSCTLR16}, - {"trcrsctlr17", TRCRSCTLR17}, - {"trcrsctlr18", TRCRSCTLR18}, - {"trcrsctlr19", TRCRSCTLR19}, - {"trcrsctlr20", TRCRSCTLR20}, - {"trcrsctlr21", TRCRSCTLR21}, - {"trcrsctlr22", TRCRSCTLR22}, - {"trcrsctlr23", TRCRSCTLR23}, - {"trcrsctlr24", TRCRSCTLR24}, - {"trcrsctlr25", TRCRSCTLR25}, - {"trcrsctlr26", TRCRSCTLR26}, - {"trcrsctlr27", TRCRSCTLR27}, - {"trcrsctlr28", TRCRSCTLR28}, - {"trcrsctlr29", TRCRSCTLR29}, - {"trcrsctlr30", TRCRSCTLR30}, - {"trcrsctlr31", TRCRSCTLR31}, - {"trcssccr0", TRCSSCCR0}, - {"trcssccr1", TRCSSCCR1}, - {"trcssccr2", TRCSSCCR2}, - {"trcssccr3", TRCSSCCR3}, - {"trcssccr4", TRCSSCCR4}, - {"trcssccr5", TRCSSCCR5}, - {"trcssccr6", TRCSSCCR6}, - {"trcssccr7", TRCSSCCR7}, - {"trcsscsr0", TRCSSCSR0}, - {"trcsscsr1", TRCSSCSR1}, - {"trcsscsr2", TRCSSCSR2}, - {"trcsscsr3", TRCSSCSR3}, - {"trcsscsr4", TRCSSCSR4}, - {"trcsscsr5", TRCSSCSR5}, - {"trcsscsr6", TRCSSCSR6}, - {"trcsscsr7", TRCSSCSR7}, - {"trcsspcicr0", TRCSSPCICR0}, - {"trcsspcicr1", TRCSSPCICR1}, - {"trcsspcicr2", TRCSSPCICR2}, - {"trcsspcicr3", TRCSSPCICR3}, - {"trcsspcicr4", TRCSSPCICR4}, - {"trcsspcicr5", TRCSSPCICR5}, - {"trcsspcicr6", TRCSSPCICR6}, - {"trcsspcicr7", TRCSSPCICR7}, - {"trcpdcr", TRCPDCR}, - {"trcacvr0", TRCACVR0}, - {"trcacvr1", TRCACVR1}, - {"trcacvr2", TRCACVR2}, - {"trcacvr3", TRCACVR3}, - {"trcacvr4", TRCACVR4}, - {"trcacvr5", TRCACVR5}, - {"trcacvr6", TRCACVR6}, - {"trcacvr7", TRCACVR7}, - {"trcacvr8", TRCACVR8}, - {"trcacvr9", TRCACVR9}, - {"trcacvr10", TRCACVR10}, - {"trcacvr11", TRCACVR11}, - {"trcacvr12", TRCACVR12}, - {"trcacvr13", TRCACVR13}, - {"trcacvr14", TRCACVR14}, - {"trcacvr15", TRCACVR15}, - {"trcacatr0", TRCACATR0}, - {"trcacatr1", TRCACATR1}, - {"trcacatr2", TRCACATR2}, - {"trcacatr3", TRCACATR3}, - {"trcacatr4", TRCACATR4}, - {"trcacatr5", TRCACATR5}, - {"trcacatr6", TRCACATR6}, - {"trcacatr7", TRCACATR7}, - {"trcacatr8", TRCACATR8}, - {"trcacatr9", TRCACATR9}, - {"trcacatr10", TRCACATR10}, - {"trcacatr11", TRCACATR11}, - {"trcacatr12", TRCACATR12}, - {"trcacatr13", TRCACATR13}, - {"trcacatr14", TRCACATR14}, - {"trcacatr15", TRCACATR15}, - {"trcdvcvr0", TRCDVCVR0}, - {"trcdvcvr1", TRCDVCVR1}, - {"trcdvcvr2", TRCDVCVR2}, - {"trcdvcvr3", TRCDVCVR3}, - {"trcdvcvr4", TRCDVCVR4}, - {"trcdvcvr5", TRCDVCVR5}, - {"trcdvcvr6", TRCDVCVR6}, - {"trcdvcvr7", TRCDVCVR7}, - {"trcdvcmr0", TRCDVCMR0}, - {"trcdvcmr1", TRCDVCMR1}, - {"trcdvcmr2", TRCDVCMR2}, - {"trcdvcmr3", TRCDVCMR3}, - {"trcdvcmr4", TRCDVCMR4}, - {"trcdvcmr5", TRCDVCMR5}, - {"trcdvcmr6", TRCDVCMR6}, - {"trcdvcmr7", TRCDVCMR7}, - {"trccidcvr0", TRCCIDCVR0}, - {"trccidcvr1", TRCCIDCVR1}, - {"trccidcvr2", TRCCIDCVR2}, - {"trccidcvr3", TRCCIDCVR3}, - {"trccidcvr4", TRCCIDCVR4}, - {"trccidcvr5", TRCCIDCVR5}, - {"trccidcvr6", TRCCIDCVR6}, - {"trccidcvr7", TRCCIDCVR7}, - {"trcvmidcvr0", TRCVMIDCVR0}, - {"trcvmidcvr1", TRCVMIDCVR1}, - {"trcvmidcvr2", TRCVMIDCVR2}, - {"trcvmidcvr3", TRCVMIDCVR3}, - {"trcvmidcvr4", TRCVMIDCVR4}, - {"trcvmidcvr5", TRCVMIDCVR5}, - {"trcvmidcvr6", TRCVMIDCVR6}, - {"trcvmidcvr7", TRCVMIDCVR7}, - {"trccidcctlr0", TRCCIDCCTLR0}, - {"trccidcctlr1", TRCCIDCCTLR1}, - {"trcvmidcctlr0", TRCVMIDCCTLR0}, - {"trcvmidcctlr1", TRCVMIDCCTLR1}, - {"trcitctrl", TRCITCTRL}, - {"trcclaimset", TRCCLAIMSET}, - {"trcclaimclr", TRCCLAIMCLR}, + {"trcprgctlr", TRCPRGCTLR, 0}, + {"trcprocselr", TRCPROCSELR, 0}, + {"trcconfigr", TRCCONFIGR, 0}, + {"trcauxctlr", TRCAUXCTLR, 0}, + {"trceventctl0r", TRCEVENTCTL0R, 0}, + {"trceventctl1r", TRCEVENTCTL1R, 0}, + {"trcstallctlr", TRCSTALLCTLR, 0}, + {"trctsctlr", TRCTSCTLR, 0}, + {"trcsyncpr", TRCSYNCPR, 0}, + {"trcccctlr", TRCCCCTLR, 0}, + {"trcbbctlr", TRCBBCTLR, 0}, + {"trctraceidr", TRCTRACEIDR, 0}, + {"trcqctlr", TRCQCTLR, 0}, + {"trcvictlr", TRCVICTLR, 0}, + {"trcviiectlr", TRCVIIECTLR, 0}, + {"trcvissctlr", TRCVISSCTLR, 0}, + {"trcvipcssctlr", TRCVIPCSSCTLR, 0}, + {"trcvdctlr", TRCVDCTLR, 0}, + {"trcvdsacctlr", TRCVDSACCTLR, 0}, + {"trcvdarcctlr", TRCVDARCCTLR, 0}, + {"trcseqevr0", TRCSEQEVR0, 0}, + {"trcseqevr1", TRCSEQEVR1, 0}, + {"trcseqevr2", TRCSEQEVR2, 0}, + {"trcseqrstevr", TRCSEQRSTEVR, 0}, + {"trcseqstr", TRCSEQSTR, 0}, + {"trcextinselr", TRCEXTINSELR, 0}, + {"trccntrldvr0", TRCCNTRLDVR0, 0}, + {"trccntrldvr1", TRCCNTRLDVR1, 0}, + {"trccntrldvr2", TRCCNTRLDVR2, 0}, + {"trccntrldvr3", TRCCNTRLDVR3, 0}, + {"trccntctlr0", TRCCNTCTLR0, 0}, + {"trccntctlr1", TRCCNTCTLR1, 0}, + {"trccntctlr2", TRCCNTCTLR2, 0}, + {"trccntctlr3", TRCCNTCTLR3, 0}, + {"trccntvr0", TRCCNTVR0, 0}, + {"trccntvr1", TRCCNTVR1, 0}, + {"trccntvr2", TRCCNTVR2, 0}, + {"trccntvr3", TRCCNTVR3, 0}, + {"trcimspec0", TRCIMSPEC0, 0}, + {"trcimspec1", TRCIMSPEC1, 0}, + {"trcimspec2", TRCIMSPEC2, 0}, + {"trcimspec3", TRCIMSPEC3, 0}, + {"trcimspec4", TRCIMSPEC4, 0}, + {"trcimspec5", TRCIMSPEC5, 0}, + {"trcimspec6", TRCIMSPEC6, 0}, + {"trcimspec7", TRCIMSPEC7, 0}, + {"trcrsctlr2", TRCRSCTLR2, 0}, + {"trcrsctlr3", TRCRSCTLR3, 0}, + {"trcrsctlr4", TRCRSCTLR4, 0}, + {"trcrsctlr5", TRCRSCTLR5, 0}, + {"trcrsctlr6", TRCRSCTLR6, 0}, + {"trcrsctlr7", TRCRSCTLR7, 0}, + {"trcrsctlr8", TRCRSCTLR8, 0}, + {"trcrsctlr9", TRCRSCTLR9, 0}, + {"trcrsctlr10", TRCRSCTLR10, 0}, + {"trcrsctlr11", TRCRSCTLR11, 0}, + {"trcrsctlr12", TRCRSCTLR12, 0}, + {"trcrsctlr13", TRCRSCTLR13, 0}, + {"trcrsctlr14", TRCRSCTLR14, 0}, + {"trcrsctlr15", TRCRSCTLR15, 0}, + {"trcrsctlr16", TRCRSCTLR16, 0}, + {"trcrsctlr17", TRCRSCTLR17, 0}, + {"trcrsctlr18", TRCRSCTLR18, 0}, + {"trcrsctlr19", TRCRSCTLR19, 0}, + {"trcrsctlr20", TRCRSCTLR20, 0}, + {"trcrsctlr21", TRCRSCTLR21, 0}, + {"trcrsctlr22", TRCRSCTLR22, 0}, + {"trcrsctlr23", TRCRSCTLR23, 0}, + {"trcrsctlr24", TRCRSCTLR24, 0}, + {"trcrsctlr25", TRCRSCTLR25, 0}, + {"trcrsctlr26", TRCRSCTLR26, 0}, + {"trcrsctlr27", TRCRSCTLR27, 0}, + {"trcrsctlr28", TRCRSCTLR28, 0}, + {"trcrsctlr29", TRCRSCTLR29, 0}, + {"trcrsctlr30", TRCRSCTLR30, 0}, + {"trcrsctlr31", TRCRSCTLR31, 0}, + {"trcssccr0", TRCSSCCR0, 0}, + {"trcssccr1", TRCSSCCR1, 0}, + {"trcssccr2", TRCSSCCR2, 0}, + {"trcssccr3", TRCSSCCR3, 0}, + {"trcssccr4", TRCSSCCR4, 0}, + {"trcssccr5", TRCSSCCR5, 0}, + {"trcssccr6", TRCSSCCR6, 0}, + {"trcssccr7", TRCSSCCR7, 0}, + {"trcsscsr0", TRCSSCSR0, 0}, + {"trcsscsr1", TRCSSCSR1, 0}, + {"trcsscsr2", TRCSSCSR2, 0}, + {"trcsscsr3", TRCSSCSR3, 0}, + {"trcsscsr4", TRCSSCSR4, 0}, + {"trcsscsr5", TRCSSCSR5, 0}, + {"trcsscsr6", TRCSSCSR6, 0}, + {"trcsscsr7", TRCSSCSR7, 0}, + {"trcsspcicr0", TRCSSPCICR0, 0}, + {"trcsspcicr1", TRCSSPCICR1, 0}, + {"trcsspcicr2", TRCSSPCICR2, 0}, + {"trcsspcicr3", TRCSSPCICR3, 0}, + {"trcsspcicr4", TRCSSPCICR4, 0}, + {"trcsspcicr5", TRCSSPCICR5, 0}, + {"trcsspcicr6", TRCSSPCICR6, 0}, + {"trcsspcicr7", TRCSSPCICR7, 0}, + {"trcpdcr", TRCPDCR, 0}, + {"trcacvr0", TRCACVR0, 0}, + {"trcacvr1", TRCACVR1, 0}, + {"trcacvr2", TRCACVR2, 0}, + {"trcacvr3", TRCACVR3, 0}, + {"trcacvr4", TRCACVR4, 0}, + {"trcacvr5", TRCACVR5, 0}, + {"trcacvr6", TRCACVR6, 0}, + {"trcacvr7", TRCACVR7, 0}, + {"trcacvr8", TRCACVR8, 0}, + {"trcacvr9", TRCACVR9, 0}, + {"trcacvr10", TRCACVR10, 0}, + {"trcacvr11", TRCACVR11, 0}, + {"trcacvr12", TRCACVR12, 0}, + {"trcacvr13", TRCACVR13, 0}, + {"trcacvr14", TRCACVR14, 0}, + {"trcacvr15", TRCACVR15, 0}, + {"trcacatr0", TRCACATR0, 0}, + {"trcacatr1", TRCACATR1, 0}, + {"trcacatr2", TRCACATR2, 0}, + {"trcacatr3", TRCACATR3, 0}, + {"trcacatr4", TRCACATR4, 0}, + {"trcacatr5", TRCACATR5, 0}, + {"trcacatr6", TRCACATR6, 0}, + {"trcacatr7", TRCACATR7, 0}, + {"trcacatr8", TRCACATR8, 0}, + {"trcacatr9", TRCACATR9, 0}, + {"trcacatr10", TRCACATR10, 0}, + {"trcacatr11", TRCACATR11, 0}, + {"trcacatr12", TRCACATR12, 0}, + {"trcacatr13", TRCACATR13, 0}, + {"trcacatr14", TRCACATR14, 0}, + {"trcacatr15", TRCACATR15, 0}, + {"trcdvcvr0", TRCDVCVR0, 0}, + {"trcdvcvr1", TRCDVCVR1, 0}, + {"trcdvcvr2", TRCDVCVR2, 0}, + {"trcdvcvr3", TRCDVCVR3, 0}, + {"trcdvcvr4", TRCDVCVR4, 0}, + {"trcdvcvr5", TRCDVCVR5, 0}, + {"trcdvcvr6", TRCDVCVR6, 0}, + {"trcdvcvr7", TRCDVCVR7, 0}, + {"trcdvcmr0", TRCDVCMR0, 0}, + {"trcdvcmr1", TRCDVCMR1, 0}, + {"trcdvcmr2", TRCDVCMR2, 0}, + {"trcdvcmr3", TRCDVCMR3, 0}, + {"trcdvcmr4", TRCDVCMR4, 0}, + {"trcdvcmr5", TRCDVCMR5, 0}, + {"trcdvcmr6", TRCDVCMR6, 0}, + {"trcdvcmr7", TRCDVCMR7, 0}, + {"trccidcvr0", TRCCIDCVR0, 0}, + {"trccidcvr1", TRCCIDCVR1, 0}, + {"trccidcvr2", TRCCIDCVR2, 0}, + {"trccidcvr3", TRCCIDCVR3, 0}, + {"trccidcvr4", TRCCIDCVR4, 0}, + {"trccidcvr5", TRCCIDCVR5, 0}, + {"trccidcvr6", TRCCIDCVR6, 0}, + {"trccidcvr7", TRCCIDCVR7, 0}, + {"trcvmidcvr0", TRCVMIDCVR0, 0}, + {"trcvmidcvr1", TRCVMIDCVR1, 0}, + {"trcvmidcvr2", TRCVMIDCVR2, 0}, + {"trcvmidcvr3", TRCVMIDCVR3, 0}, + {"trcvmidcvr4", TRCVMIDCVR4, 0}, + {"trcvmidcvr5", TRCVMIDCVR5, 0}, + {"trcvmidcvr6", TRCVMIDCVR6, 0}, + {"trcvmidcvr7", TRCVMIDCVR7, 0}, + {"trccidcctlr0", TRCCIDCCTLR0, 0}, + {"trccidcctlr1", TRCCIDCCTLR1, 0}, + {"trcvmidcctlr0", TRCVMIDCCTLR0, 0}, + {"trcvmidcctlr1", TRCVMIDCCTLR1, 0}, + {"trcitctrl", TRCITCTRL, 0}, + {"trcclaimset", TRCCLAIMSET, 0}, + {"trcclaimclr", TRCCLAIMCLR, 0}, // GICv3 registers - {"icc_bpr1_el1", ICC_BPR1_EL1}, - {"icc_bpr0_el1", ICC_BPR0_EL1}, - {"icc_pmr_el1", ICC_PMR_EL1}, - {"icc_ctlr_el1", ICC_CTLR_EL1}, - {"icc_ctlr_el3", ICC_CTLR_EL3}, - {"icc_sre_el1", ICC_SRE_EL1}, - {"icc_sre_el2", ICC_SRE_EL2}, - {"icc_sre_el3", ICC_SRE_EL3}, - {"icc_igrpen0_el1", ICC_IGRPEN0_EL1}, - {"icc_igrpen1_el1", ICC_IGRPEN1_EL1}, - {"icc_igrpen1_el3", ICC_IGRPEN1_EL3}, - {"icc_seien_el1", ICC_SEIEN_EL1}, - {"icc_ap0r0_el1", ICC_AP0R0_EL1}, - {"icc_ap0r1_el1", ICC_AP0R1_EL1}, - {"icc_ap0r2_el1", ICC_AP0R2_EL1}, - {"icc_ap0r3_el1", ICC_AP0R3_EL1}, - {"icc_ap1r0_el1", ICC_AP1R0_EL1}, - {"icc_ap1r1_el1", ICC_AP1R1_EL1}, - {"icc_ap1r2_el1", ICC_AP1R2_EL1}, - {"icc_ap1r3_el1", ICC_AP1R3_EL1}, - {"ich_ap0r0_el2", ICH_AP0R0_EL2}, - {"ich_ap0r1_el2", ICH_AP0R1_EL2}, - {"ich_ap0r2_el2", ICH_AP0R2_EL2}, - {"ich_ap0r3_el2", ICH_AP0R3_EL2}, - {"ich_ap1r0_el2", ICH_AP1R0_EL2}, - {"ich_ap1r1_el2", ICH_AP1R1_EL2}, - {"ich_ap1r2_el2", ICH_AP1R2_EL2}, - {"ich_ap1r3_el2", ICH_AP1R3_EL2}, - {"ich_hcr_el2", ICH_HCR_EL2}, - {"ich_misr_el2", ICH_MISR_EL2}, - {"ich_vmcr_el2", ICH_VMCR_EL2}, - {"ich_vseir_el2", ICH_VSEIR_EL2}, - {"ich_lr0_el2", ICH_LR0_EL2}, - {"ich_lr1_el2", ICH_LR1_EL2}, - {"ich_lr2_el2", ICH_LR2_EL2}, - {"ich_lr3_el2", ICH_LR3_EL2}, - {"ich_lr4_el2", ICH_LR4_EL2}, - {"ich_lr5_el2", ICH_LR5_EL2}, - {"ich_lr6_el2", ICH_LR6_EL2}, - {"ich_lr7_el2", ICH_LR7_EL2}, - {"ich_lr8_el2", ICH_LR8_EL2}, - {"ich_lr9_el2", ICH_LR9_EL2}, - {"ich_lr10_el2", ICH_LR10_EL2}, - {"ich_lr11_el2", ICH_LR11_EL2}, - {"ich_lr12_el2", ICH_LR12_EL2}, - {"ich_lr13_el2", ICH_LR13_EL2}, - {"ich_lr14_el2", ICH_LR14_EL2}, - {"ich_lr15_el2", ICH_LR15_EL2} -}; - -const AArch64NamedImmMapper::Mapping -AArch64SysReg::SysRegMapper::CycloneSysRegMappings[] = { - {"cpm_ioacc_ctl_el3", CPM_IOACC_CTL_EL3} + {"icc_bpr1_el1", ICC_BPR1_EL1, 0}, + {"icc_bpr0_el1", ICC_BPR0_EL1, 0}, + {"icc_pmr_el1", ICC_PMR_EL1, 0}, + {"icc_ctlr_el1", ICC_CTLR_EL1, 0}, + {"icc_ctlr_el3", ICC_CTLR_EL3, 0}, + {"icc_sre_el1", ICC_SRE_EL1, 0}, + {"icc_sre_el2", ICC_SRE_EL2, 0}, + {"icc_sre_el3", ICC_SRE_EL3, 0}, + {"icc_igrpen0_el1", ICC_IGRPEN0_EL1, 0}, + {"icc_igrpen1_el1", ICC_IGRPEN1_EL1, 0}, + {"icc_igrpen1_el3", ICC_IGRPEN1_EL3, 0}, + {"icc_seien_el1", ICC_SEIEN_EL1, 0}, + {"icc_ap0r0_el1", ICC_AP0R0_EL1, 0}, + {"icc_ap0r1_el1", ICC_AP0R1_EL1, 0}, + {"icc_ap0r2_el1", ICC_AP0R2_EL1, 0}, + {"icc_ap0r3_el1", ICC_AP0R3_EL1, 0}, + {"icc_ap1r0_el1", ICC_AP1R0_EL1, 0}, + {"icc_ap1r1_el1", ICC_AP1R1_EL1, 0}, + {"icc_ap1r2_el1", ICC_AP1R2_EL1, 0}, + {"icc_ap1r3_el1", ICC_AP1R3_EL1, 0}, + {"ich_ap0r0_el2", ICH_AP0R0_EL2, 0}, + {"ich_ap0r1_el2", ICH_AP0R1_EL2, 0}, + {"ich_ap0r2_el2", ICH_AP0R2_EL2, 0}, + {"ich_ap0r3_el2", ICH_AP0R3_EL2, 0}, + {"ich_ap1r0_el2", ICH_AP1R0_EL2, 0}, + {"ich_ap1r1_el2", ICH_AP1R1_EL2, 0}, + {"ich_ap1r2_el2", ICH_AP1R2_EL2, 0}, + {"ich_ap1r3_el2", ICH_AP1R3_EL2, 0}, + {"ich_hcr_el2", ICH_HCR_EL2, 0}, + {"ich_misr_el2", ICH_MISR_EL2, 0}, + {"ich_vmcr_el2", ICH_VMCR_EL2, 0}, + {"ich_vseir_el2", ICH_VSEIR_EL2, 0}, + {"ich_lr0_el2", ICH_LR0_EL2, 0}, + {"ich_lr1_el2", ICH_LR1_EL2, 0}, + {"ich_lr2_el2", ICH_LR2_EL2, 0}, + {"ich_lr3_el2", ICH_LR3_EL2, 0}, + {"ich_lr4_el2", ICH_LR4_EL2, 0}, + {"ich_lr5_el2", ICH_LR5_EL2, 0}, + {"ich_lr6_el2", ICH_LR6_EL2, 0}, + {"ich_lr7_el2", ICH_LR7_EL2, 0}, + {"ich_lr8_el2", ICH_LR8_EL2, 0}, + {"ich_lr9_el2", ICH_LR9_EL2, 0}, + {"ich_lr10_el2", ICH_LR10_EL2, 0}, + {"ich_lr11_el2", ICH_LR11_EL2, 0}, + {"ich_lr12_el2", ICH_LR12_EL2, 0}, + {"ich_lr13_el2", ICH_LR13_EL2, 0}, + {"ich_lr14_el2", ICH_LR14_EL2, 0}, + {"ich_lr15_el2", ICH_LR15_EL2, 0}, + + // Cyclone registers + {"cpm_ioacc_ctl_el3", CPM_IOACC_CTL_EL3, AArch64::ProcCyclone}, + + // v8.1a "Privileged Access Never" extension-specific system registers + {"pan", PAN, AArch64::HasV8_1aOps}, + + // v8.1a "Limited Ordering Regions" extension-specific system registers + {"lorsa_el1", LORSA_EL1, AArch64::HasV8_1aOps}, + {"lorea_el1", LOREA_EL1, AArch64::HasV8_1aOps}, + {"lorn_el1", LORN_EL1, AArch64::HasV8_1aOps}, + {"lorc_el1", LORC_EL1, AArch64::HasV8_1aOps}, + {"lorid_el1", LORID_EL1, AArch64::HasV8_1aOps}, + + // v8.1a "Virtualization host extensions" system registers + {"ttbr1_el2", TTBR1_EL2, AArch64::HasV8_1aOps}, + {"contextidr_el2", CONTEXTIDR_EL2, AArch64::HasV8_1aOps}, + {"cnthv_tval_el2", CNTHV_TVAL_EL2, AArch64::HasV8_1aOps}, + {"cnthv_cval_el2", CNTHV_CVAL_EL2, AArch64::HasV8_1aOps}, + {"cnthv_ctl_el2", CNTHV_CTL_EL2, AArch64::HasV8_1aOps}, + {"sctlr_el12", SCTLR_EL12, AArch64::HasV8_1aOps}, + {"cpacr_el12", CPACR_EL12, AArch64::HasV8_1aOps}, + {"ttbr0_el12", TTBR0_EL12, AArch64::HasV8_1aOps}, + {"ttbr1_el12", TTBR1_EL12, AArch64::HasV8_1aOps}, + {"tcr_el12", TCR_EL12, AArch64::HasV8_1aOps}, + {"afsr0_el12", AFSR0_EL12, AArch64::HasV8_1aOps}, + {"afsr1_el12", AFSR1_EL12, AArch64::HasV8_1aOps}, + {"esr_el12", ESR_EL12, AArch64::HasV8_1aOps}, + {"far_el12", FAR_EL12, AArch64::HasV8_1aOps}, + {"mair_el12", MAIR_EL12, AArch64::HasV8_1aOps}, + {"amair_el12", AMAIR_EL12, AArch64::HasV8_1aOps}, + {"vbar_el12", VBAR_EL12, AArch64::HasV8_1aOps}, + {"contextidr_el12", CONTEXTIDR_EL12, AArch64::HasV8_1aOps}, + {"cntkctl_el12", CNTKCTL_EL12, AArch64::HasV8_1aOps}, + {"cntp_tval_el02", CNTP_TVAL_EL02, AArch64::HasV8_1aOps}, + {"cntp_ctl_el02", CNTP_CTL_EL02, AArch64::HasV8_1aOps}, + {"cntp_cval_el02", CNTP_CVAL_EL02, AArch64::HasV8_1aOps}, + {"cntv_tval_el02", CNTV_TVAL_EL02, AArch64::HasV8_1aOps}, + {"cntv_ctl_el02", CNTV_CTL_EL02, AArch64::HasV8_1aOps}, + {"cntv_cval_el02", CNTV_CVAL_EL02, AArch64::HasV8_1aOps}, + {"spsr_el12", SPSR_EL12, AArch64::HasV8_1aOps}, + {"elr_el12", ELR_EL12, AArch64::HasV8_1aOps}, }; uint32_t -AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const { +AArch64SysReg::SysRegMapper::fromString(StringRef Name, uint64_t FeatureBits, + bool &Valid) const { std::string NameLower = Name.lower(); // First search the registers shared by all for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) { - if (SysRegMappings[i].Name == NameLower) { + if (SysRegMappings[i].isNameEqual(NameLower, FeatureBits)) { Valid = true; return SysRegMappings[i].Value; } } - // Next search for target specific registers - if (FeatureBits & AArch64::ProcCyclone) { - for (unsigned i = 0; i < array_lengthof(CycloneSysRegMappings); ++i) { - if (CycloneSysRegMappings[i].Name == NameLower) { - Valid = true; - return CycloneSysRegMappings[i].Value; - } - } - } - // Now try the instruction-specific registers (either read-only or // write-only). for (unsigned i = 0; i < NumInstMappings; ++i) { - if (InstMappings[i].Name == NameLower) { + if (InstMappings[i].isNameEqual(NameLower, FeatureBits)) { Valid = true; return InstMappings[i].Value; } @@ -814,27 +848,18 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const { } std::string -AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const { +AArch64SysReg::SysRegMapper::toString(uint32_t Bits, uint64_t FeatureBits) const { // First search the registers shared by all for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) { - if (SysRegMappings[i].Value == Bits) { + if (SysRegMappings[i].isValueEqual(Bits, FeatureBits)) { return SysRegMappings[i].Name; } } - // Next search for target specific registers - if (FeatureBits & AArch64::ProcCyclone) { - for (unsigned i = 0; i < array_lengthof(CycloneSysRegMappings); ++i) { - if (CycloneSysRegMappings[i].Value == Bits) { - return CycloneSysRegMappings[i].Name; - } - } - } - // Now try the instruction-specific registers (either read-only or // write-only). for (unsigned i = 0; i < NumInstMappings; ++i) { - if (InstMappings[i].Value == Bits) { + if (InstMappings[i].isValueEqual(Bits, FeatureBits)) { return InstMappings[i].Name; } } @@ -851,38 +876,38 @@ AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const { } const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIMappings[] = { - {"ipas2e1is", IPAS2E1IS}, - {"ipas2le1is", IPAS2LE1IS}, - {"vmalle1is", VMALLE1IS}, - {"alle2is", ALLE2IS}, - {"alle3is", ALLE3IS}, - {"vae1is", VAE1IS}, - {"vae2is", VAE2IS}, - {"vae3is", VAE3IS}, - {"aside1is", ASIDE1IS}, - {"vaae1is", VAAE1IS}, - {"alle1is", ALLE1IS}, - {"vale1is", VALE1IS}, - {"vale2is", VALE2IS}, - {"vale3is", VALE3IS}, - {"vmalls12e1is", VMALLS12E1IS}, - {"vaale1is", VAALE1IS}, - {"ipas2e1", IPAS2E1}, - {"ipas2le1", IPAS2LE1}, - {"vmalle1", VMALLE1}, - {"alle2", ALLE2}, - {"alle3", ALLE3}, - {"vae1", VAE1}, - {"vae2", VAE2}, - {"vae3", VAE3}, - {"aside1", ASIDE1}, - {"vaae1", VAAE1}, - {"alle1", ALLE1}, - {"vale1", VALE1}, - {"vale2", VALE2}, - {"vale3", VALE3}, - {"vmalls12e1", VMALLS12E1}, - {"vaale1", VAALE1} + {"ipas2e1is", IPAS2E1IS, 0}, + {"ipas2le1is", IPAS2LE1IS, 0}, + {"vmalle1is", VMALLE1IS, 0}, + {"alle2is", ALLE2IS, 0}, + {"alle3is", ALLE3IS, 0}, + {"vae1is", VAE1IS, 0}, + {"vae2is", VAE2IS, 0}, + {"vae3is", VAE3IS, 0}, + {"aside1is", ASIDE1IS, 0}, + {"vaae1is", VAAE1IS, 0}, + {"alle1is", ALLE1IS, 0}, + {"vale1is", VALE1IS, 0}, + {"vale2is", VALE2IS, 0}, + {"vale3is", VALE3IS, 0}, + {"vmalls12e1is", VMALLS12E1IS, 0}, + {"vaale1is", VAALE1IS, 0}, + {"ipas2e1", IPAS2E1, 0}, + {"ipas2le1", IPAS2LE1, 0}, + {"vmalle1", VMALLE1, 0}, + {"alle2", ALLE2, 0}, + {"alle3", ALLE3, 0}, + {"vae1", VAE1, 0}, + {"vae2", VAE2, 0}, + {"vae3", VAE3, 0}, + {"aside1", ASIDE1, 0}, + {"vaae1", VAAE1, 0}, + {"alle1", ALLE1, 0}, + {"vale1", VALE1, 0}, + {"vale2", VALE2, 0}, + {"vale3", VALE3, 0}, + {"vmalls12e1", VMALLS12E1, 0}, + {"vaale1", VAALE1, 0} }; AArch64TLBI::TLBIMapper::TLBIMapper() diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h index 2ae6f52..659ea90 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -280,14 +280,26 @@ struct AArch64NamedImmMapper { struct Mapping { const char *Name; uint32_t Value; + uint64_t AvailableForFeatures; + // empty AvailableForFeatures means "always-on" + bool isNameEqual(std::string Other, uint64_t FeatureBits=~0ULL) const { + if (AvailableForFeatures && !(AvailableForFeatures & FeatureBits)) + return false; + return Name == Other; + } + bool isValueEqual(uint32_t Other, uint64_t FeatureBits=~0ULL) const { + if (AvailableForFeatures && !(AvailableForFeatures & FeatureBits)) + return false; + return Value == Other; + } }; template<int N> AArch64NamedImmMapper(const Mapping (&Mappings)[N], uint32_t TooBigImm) : Mappings(&Mappings[0]), NumMappings(N), TooBigImm(TooBigImm) {} - StringRef toString(uint32_t Value, bool &Valid) const; - uint32_t fromString(StringRef Name, bool &Valid) const; + StringRef toString(uint32_t Value, uint64_t FeatureBits, bool &Valid) const; + uint32_t fromString(StringRef Name, uint64_t FeatureBits, bool &Valid) const; /// Many of the instructions allow an alternative assembly form consisting of /// a simple immediate. Currently the only valid forms are ranges [0, N) where @@ -435,7 +447,10 @@ namespace AArch64PState { Invalid = -1, SPSel = 0x05, DAIFSet = 0x1e, - DAIFClr = 0x1f + DAIFClr = 0x1f, + + // v8.1a "Privileged Access Never" extension-specific PStates + PAN = 0x04, }; struct PStateMapper : AArch64NamedImmMapper { @@ -1122,11 +1137,48 @@ namespace AArch64SysReg { ICH_LR13_EL2 = 0xe66d, // 11 100 1100 1101 101 ICH_LR14_EL2 = 0xe66e, // 11 100 1100 1101 110 ICH_LR15_EL2 = 0xe66f, // 11 100 1100 1101 111 - }; - // Cyclone specific system registers - enum CycloneSysRegValues { - CPM_IOACC_CTL_EL3 = 0xff90 + // v8.1a "Privileged Access Never" extension-specific system registers + PAN = 0xc213, // 11 000 0100 0010 011 + + // v8.1a "Limited Ordering Regions" extension-specific system registers + LORSA_EL1 = 0xc520, // 11 000 1010 0100 000 + LOREA_EL1 = 0xc521, // 11 000 1010 0100 001 + LORN_EL1 = 0xc522, // 11 000 1010 0100 010 + LORC_EL1 = 0xc523, // 11 000 1010 0100 011 + LORID_EL1 = 0xc527, // 11 000 1010 0100 111 + + // v8.1a "Virtualization host extensions" system registers + TTBR1_EL2 = 0xe101, // 11 100 0010 0000 001 + CONTEXTIDR_EL2 = 0xe681, // 11 100 1101 0000 001 + CNTHV_TVAL_EL2 = 0xe718, // 11 100 1110 0011 000 + CNTHV_CVAL_EL2 = 0xe71a, // 11 100 1110 0011 010 + CNTHV_CTL_EL2 = 0xe719, // 11 100 1110 0011 001 + SCTLR_EL12 = 0xe880, // 11 101 0001 0000 000 + CPACR_EL12 = 0xe882, // 11 101 0001 0000 010 + TTBR0_EL12 = 0xe900, // 11 101 0010 0000 000 + TTBR1_EL12 = 0xe901, // 11 101 0010 0000 001 + TCR_EL12 = 0xe902, // 11 101 0010 0000 010 + AFSR0_EL12 = 0xea88, // 11 101 0101 0001 000 + AFSR1_EL12 = 0xea89, // 11 101 0101 0001 001 + ESR_EL12 = 0xea90, // 11 101 0101 0010 000 + FAR_EL12 = 0xeb00, // 11 101 0110 0000 000 + MAIR_EL12 = 0xed10, // 11 101 1010 0010 000 + AMAIR_EL12 = 0xed18, // 11 101 1010 0011 000 + VBAR_EL12 = 0xee00, // 11 101 1100 0000 000 + CONTEXTIDR_EL12 = 0xee81, // 11 101 1101 0000 001 + CNTKCTL_EL12 = 0xef08, // 11 101 1110 0001 000 + CNTP_TVAL_EL02 = 0xef10, // 11 101 1110 0010 000 + CNTP_CTL_EL02 = 0xef11, // 11 101 1110 0010 001 + CNTP_CVAL_EL02 = 0xef12, // 11 101 1110 0010 010 + CNTV_TVAL_EL02 = 0xef18, // 11 101 1110 0011 000 + CNTV_CTL_EL02 = 0xef19, // 11 101 1110 0011 001 + CNTV_CVAL_EL02 = 0xef1a, // 11 101 1110 0011 010 + SPSR_EL12 = 0xea00, // 11 101 0100 0000 000 + ELR_EL12 = 0xea01, // 11 101 0100 0000 001 + + // Cyclone specific system registers + CPM_IOACC_CTL_EL3 = 0xff90, }; // Note that these do not inherit from AArch64NamedImmMapper. This class is @@ -1135,25 +1187,23 @@ namespace AArch64SysReg { // this one case. struct SysRegMapper { static const AArch64NamedImmMapper::Mapping SysRegMappings[]; - static const AArch64NamedImmMapper::Mapping CycloneSysRegMappings[]; const AArch64NamedImmMapper::Mapping *InstMappings; size_t NumInstMappings; - uint64_t FeatureBits; - SysRegMapper(uint64_t FeatureBits) : FeatureBits(FeatureBits) { } - uint32_t fromString(StringRef Name, bool &Valid) const; - std::string toString(uint32_t Bits) const; + SysRegMapper() { } + uint32_t fromString(StringRef Name, uint64_t FeatureBits, bool &Valid) const; + std::string toString(uint32_t Bits, uint64_t FeatureBits) const; }; struct MSRMapper : SysRegMapper { static const AArch64NamedImmMapper::Mapping MSRMappings[]; - MSRMapper(uint64_t FeatureBits); + MSRMapper(); }; struct MRSMapper : SysRegMapper { static const AArch64NamedImmMapper::Mapping MRSMappings[]; - MRSMapper(uint64_t FeatureBits); + MRSMapper(); }; uint32_t ParseGenericRegister(StringRef Name, bool &Valid); diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index ce0aed9..bd1c7af 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -180,7 +180,7 @@ def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true", "Support ARM v8 instructions", [HasV7Ops, FeatureVirtualization, FeatureMP]>; -def FeatureV8_1a : SubtargetFeature<"v8.1a", "HasV8_1a", "true", +def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", "Support ARM v8.1a instructions", [HasV8Ops, FeatureAClass, FeatureCRC]>; @@ -260,6 +260,14 @@ def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", FeatureTrustZone, FeatureT2XtPk, FeatureCrypto, FeatureCRC]>; +def ProcR4 : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4", + "Cortex-R4 ARM processors", + [FeatureHWDiv, + FeatureAvoidPartialCPSR, + FeatureDSPThumb2, FeatureT2XtPk, + HasV7Ops, FeatureDB, FeatureHasRAS, + FeatureRClass]>; + def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5", "Cortex-R5 ARM processors", [FeatureSlowFPBrcc, @@ -396,6 +404,16 @@ def : ProcessorModel<"krait", CortexA9Model, FeatureDSPThumb2, FeatureHasRAS, FeatureAClass]>; +// FIXME: R4 has currently the same ProcessorModel as A8. +def : ProcessorModel<"cortex-r4", CortexA8Model, + [ProcR4]>; + +// FIXME: R4F has currently the same ProcessorModel as A8. +def : ProcessorModel<"cortex-r4f", CortexA8Model, + [ProcR4, + FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, + FeatureVFP3, FeatureVFPOnlySP, FeatureD16]>; + // FIXME: R5 has currently the same ProcessorModel as A8. def : ProcessorModel<"cortex-r5", CortexA8Model, [ProcR5, HasV7Ops, FeatureDB, @@ -457,14 +475,6 @@ def : ProcessorModel<"cyclone", SwiftModel, FeatureDB,FeatureDSPThumb2, FeatureHasRAS, FeatureZCZeroing]>; -// V8.1 Processors -def : ProcNoItin<"generic-armv8.1-a", [HasV8Ops, FeatureV8_1a, - FeatureDB, FeatureFPARMv8, - FeatureNEON, FeatureDSPThumb2, - FeatureHWDiv, FeatureHWDivARM, - FeatureTrustZone, FeatureT2XtPk, - FeatureCrypto]>; - //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// @@ -485,7 +495,15 @@ def ARMInstrInfo : InstrInfo; // Declare the target which we are implementing //===----------------------------------------------------------------------===// +def ARMAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + int PassSubtarget = 1; + int Variant = 0; + bit isMCAsmWriter = 1; +} + def ARM : Target { // Pull in Instruction Info: let InstructionSet = ARMInstrInfo; + let AssemblyWriters = [ARMAsmWriter]; } diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 102def1..1a2acf5 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -207,7 +207,7 @@ GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const { SmallString<60> Name; raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << uid << '_' << uid2; - return OutContext.GetOrCreateSymbol(Name.str()); + return OutContext.GetOrCreateSymbol(Name); } @@ -216,7 +216,7 @@ MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel() const { SmallString<60> Name; raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "SJLJEH" << getFunctionNumber(); - return OutContext.GetOrCreateSymbol(Name.str()); + return OutContext.GetOrCreateSymbol(Name); } bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, @@ -520,28 +520,6 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { // generates code that does this, it is always safe to set. OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); } - - // Emit a .data.rel section containing any stubs that were created. - if (TT.isOSBinFormatELF()) { - const TargetLoweringObjectFileELF &TLOFELF = - static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering()); - - MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); - - // Output stubs for external and common global variables. - MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); - if (!Stubs.empty()) { - OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); - const DataLayout *TD = TM.getDataLayout(); - - for (auto &stub: Stubs) { - OutStreamer.EmitLabel(stub.first); - OutStreamer.EmitSymbolValue(stub.second.getPointer(), - TD->getPointerSize(0)); - } - Stubs.clear(); - } - } } //===----------------------------------------------------------------------===// @@ -597,7 +575,7 @@ void ARMAsmPrinter::emitAttributes() { std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPU); if (!FS.empty()) { if (!ArchFS.empty()) - ArchFS = ArchFS + "," + FS.str(); + ArchFS = (Twine(ArchFS) + "," + FS).str(); else ArchFS = FS; } @@ -661,8 +639,8 @@ void ARMAsmPrinter::emitAttributes() { // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture if (STI.hasV8Ops()) ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, - STI.hasV8_1a() ? ARMBuildAttrs::AllowNeonARMv8_1a: - ARMBuildAttrs::AllowNeonARMv8); + STI.hasV8_1aOps() ? ARMBuildAttrs::AllowNeonARMv8_1a: + ARMBuildAttrs::AllowNeonARMv8); } else { if (STI.hasFPARMv8()) // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index a8c7657..3f79a9b 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -245,11 +245,15 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg, // This register should preferably be even (Odd == 0) or odd (Odd == 1). // Check if the other part of the pair has already been assigned, and provide // the paired register as the first hint. + unsigned Paired = Hint.second; + if (Paired == 0) + return; + unsigned PairedPhys = 0; - if (VRM && VRM->hasPhys(Hint.second)) { - PairedPhys = getPairedGPR(VRM->getPhys(Hint.second), Odd, this); - if (PairedPhys && MRI.isReserved(PairedPhys)) - PairedPhys = 0; + if (TargetRegisterInfo::isPhysicalRegister(Paired)) { + PairedPhys = Paired; + } else if (VRM && VRM->hasPhys(Paired)) { + PairedPhys = getPairedGPR(VRM->getPhys(Paired), Odd, this); } // First prefer the paired physreg. @@ -284,9 +288,14 @@ ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg, // change. unsigned OtherReg = Hint.second; Hint = MRI->getRegAllocationHint(OtherReg); - if (Hint.second == Reg) - // Make sure the pair has not already divorced. + // Make sure the pair has not already divorced. + if (Hint.second == Reg) { MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg); + if (TargetRegisterInfo::isVirtualRegister(NewReg)) + MRI->setRegAllocationHint(NewReg, + Hint.first == (unsigned)ARMRI::RegPairOdd ? ARMRI::RegPairEven + : ARMRI::RegPairOdd, OtherReg); + } } } diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h index 13bef54..36f63e2 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.h +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -86,7 +86,7 @@ protected: } public: - virtual ~ARMConstantPoolValue(); + ~ARMConstantPoolValue() override; ARMCP::ARMCPModifier getModifier() const { return Modifier; } const char *getModifierText() const; diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 830953b..9d2b09b 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -311,6 +311,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { return; StackAdjustingInsts DefCFAOffsetCandidates; + bool HasFP = hasFP(MF); // Allocate the vararg register save area. if (ArgRegsSaveSize) { @@ -327,6 +328,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes - ArgRegsSaveSize, true); } + DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP); return; } @@ -375,7 +377,6 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { } // Determine starting offsets of spill areas. - bool HasFP = hasFP(MF); unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size; unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size; unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 3b1b8dd..72afd2c 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -3504,25 +3504,34 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { ARMCC::CondCodes CondCode, CondCode2; FPCCToARMCC(CC, CondCode, CondCode2); - // Try to generate VSEL on ARMv8. + // Try to generate VMAXNM/VMINNM on ARMv8. if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || TrueVal.getValueType() == MVT::f64)) { - // We can select VMAXNM/VMINNM from a compare followed by a select with the + // We can use VMAXNM/VMINNM for a compare followed by a select with the // same operands, as follows: - // c = fcmp [ogt, olt, ugt, ult] a, b + // c = fcmp [?gt, ?ge, ?lt, ?le] a, b // select c, a, b - // We only do this in unsafe-fp-math, because signed zeros and NaNs are - // handled differently than the original code sequence. + // In NoNaNsFPMath the CC will have been changed from, e.g., 'ogt' to 'gt'. + // We only do this transformation in UnsafeFPMath and for no-NaNs + // comparisons, because signed zeros and NaNs are handled differently than + // the original code sequence. + // FIXME: There are more cases that can be transformed even with NaNs, + // signed zeroes and safe math. E.g. in the following, the result will be + // FalseVal if a is a NaN or -0./0. and that's what vmaxnm will give, too. + // c = fcmp ogt, a, 0. ; select c, a, 0. => vmaxnm a, 0. + // FIXME: There is similar code that allows some extensions in + // AArch64TargetLowering::LowerSELECT_CC that should be shared with this + // code. if (getTargetMachine().Options.UnsafeFPMath) { if (LHS == TrueVal && RHS == FalseVal) { - if (CC == ISD::SETOGT || CC == ISD::SETUGT) + if (CC == ISD::SETGT || CC == ISD::SETGE) return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal); - if (CC == ISD::SETOLT || CC == ISD::SETULT) + if (CC == ISD::SETLT || CC == ISD::SETLE) return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal); } else if (LHS == FalseVal && RHS == TrueVal) { - if (CC == ISD::SETOLT || CC == ISD::SETULT) + if (CC == ISD::SETLT || CC == ISD::SETLE) return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal); - if (CC == ISD::SETOGT || CC == ISD::SETUGT) + if (CC == ISD::SETGT || CC == ISD::SETGE) return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal); } } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index c3984ca..52f3555 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -208,6 +208,8 @@ def HasV8 : Predicate<"Subtarget->hasV8Ops()">, AssemblerPredicate<"HasV8Ops", "armv8">; def PreV8 : Predicate<"!Subtarget->hasV8Ops()">, AssemblerPredicate<"!HasV8Ops", "armv7 or earlier">; +def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">, + AssemblerPredicate<"HasV8_1aOps", "armv8.1a">; def NoVFP : Predicate<"!Subtarget->hasVFP2()">; def HasVFP2 : Predicate<"Subtarget->hasVFP2()">, AssemblerPredicate<"FeatureVFP2", "VFP2">; @@ -226,8 +228,6 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">, AssemblerPredicate<"FeatureCrypto", "crypto">; def HasCRC : Predicate<"Subtarget->hasCRC()">, AssemblerPredicate<"FeatureCRC", "crc">; -def HasV8_1a : Predicate<"Subtarget->hasV8_1a()">, - AssemblerPredicate<"FeatureV8_1a", "v8.1a">; def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16","half-float">; def HasDivide : Predicate<"Subtarget->hasDivide()">, @@ -388,6 +388,9 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{ // Immediate operands with a shared generic asm render method. class ImmAsmOperand : AsmOperandClass { let RenderMethod = "addImmOperands"; } +// Operands that are part of a memory addressing mode. +class MemOperand : Operand<i32> { let OperandType = "OPERAND_MEMORY"; } + // Branch target. // FIXME: rename brtarget to t2_brtarget def brtarget : Operand<OtherVT> { @@ -790,7 +793,7 @@ def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }], // addrmode_imm12 := reg +/- imm12 // def MemImm12OffsetAsmOperand : AsmOperandClass { let Name = "MemImm12Offset"; } -class AddrMode_Imm12 : Operand<i32>, +class AddrMode_Imm12 : MemOperand, ComplexPattern<i32, 2, "SelectAddrModeImm12", []> { // 12-bit immediate operand. Note that instructions using this encode // #0 and #-0 differently. We flag #-0 as the magic value INT32_MIN. All other @@ -813,7 +816,7 @@ def addrmode_imm12_pre : AddrMode_Imm12 { // ldst_so_reg := reg +/- reg shop imm // def MemRegOffsetAsmOperand : AsmOperandClass { let Name = "MemRegOffset"; } -def ldst_so_reg : Operand<i32>, +def ldst_so_reg : MemOperand, ComplexPattern<i32, 3, "SelectLdStSOReg", []> { let EncoderMethod = "getLdStSORegOpValue"; // FIXME: Simplify the printer @@ -829,7 +832,7 @@ def ldst_so_reg : Operand<i32>, // {8} 1 is imm8 is non-negative. 0 otherwise. // {7-0} [0,255] imm8 value. def PostIdxImm8AsmOperand : AsmOperandClass { let Name = "PostIdxImm8"; } -def postidx_imm8 : Operand<i32> { +def postidx_imm8 : MemOperand { let PrintMethod = "printPostIdxImm8Operand"; let ParserMatchClass = PostIdxImm8AsmOperand; let MIOperandInfo = (ops i32imm); @@ -841,7 +844,7 @@ def postidx_imm8 : Operand<i32> { // {8} 1 is imm8 is non-negative. 0 otherwise. // {7-0} [0,255] imm8 value, scaled by 4. def PostIdxImm8s4AsmOperand : AsmOperandClass { let Name = "PostIdxImm8s4"; } -def postidx_imm8s4 : Operand<i32> { +def postidx_imm8s4 : MemOperand { let PrintMethod = "printPostIdxImm8s4Operand"; let ParserMatchClass = PostIdxImm8s4AsmOperand; let MIOperandInfo = (ops i32imm); @@ -854,7 +857,7 @@ def PostIdxRegAsmOperand : AsmOperandClass { let Name = "PostIdxReg"; let ParserMethod = "parsePostIdxReg"; } -def postidx_reg : Operand<i32> { +def postidx_reg : MemOperand { let EncoderMethod = "getPostIdxRegOpValue"; let DecoderMethod = "DecodePostIdxReg"; let PrintMethod = "printPostIdxRegOperand"; @@ -869,7 +872,7 @@ def postidx_reg : Operand<i32> { // FIXME: addrmode2 should be refactored the rest of the way to always // use explicit imm vs. reg versions above (addrmode_imm12 and ldst_so_reg). def AddrMode2AsmOperand : AsmOperandClass { let Name = "AddrMode2"; } -def addrmode2 : Operand<i32>, +def addrmode2 : MemOperand, ComplexPattern<i32, 3, "SelectAddrMode2", []> { let EncoderMethod = "getAddrMode2OpValue"; let PrintMethod = "printAddrMode2Operand"; @@ -881,7 +884,7 @@ def PostIdxRegShiftedAsmOperand : AsmOperandClass { let Name = "PostIdxRegShifted"; let ParserMethod = "parsePostIdxReg"; } -def am2offset_reg : Operand<i32>, +def am2offset_reg : MemOperand, ComplexPattern<i32, 2, "SelectAddrMode2OffsetReg", [], [SDNPWantRoot]> { let EncoderMethod = "getAddrMode2OffsetOpValue"; @@ -894,7 +897,7 @@ def am2offset_reg : Operand<i32>, // FIXME: am2offset_imm should only need the immediate, not the GPR. Having // the GPR is purely vestigal at this point. def AM2OffsetImmAsmOperand : AsmOperandClass { let Name = "AM2OffsetImm"; } -def am2offset_imm : Operand<i32>, +def am2offset_imm : MemOperand, ComplexPattern<i32, 2, "SelectAddrMode2OffsetImm", [], [SDNPWantRoot]> { let EncoderMethod = "getAddrMode2OffsetOpValue"; @@ -909,7 +912,7 @@ def am2offset_imm : Operand<i32>, // // FIXME: split into imm vs. reg versions. def AddrMode3AsmOperand : AsmOperandClass { let Name = "AddrMode3"; } -class AddrMode3 : Operand<i32>, +class AddrMode3 : MemOperand, ComplexPattern<i32, 3, "SelectAddrMode3", []> { let EncoderMethod = "getAddrMode3OpValue"; let ParserMatchClass = AddrMode3AsmOperand; @@ -932,7 +935,7 @@ def AM3OffsetAsmOperand : AsmOperandClass { let Name = "AM3Offset"; let ParserMethod = "parseAM3Offset"; } -def am3offset : Operand<i32>, +def am3offset : MemOperand, ComplexPattern<i32, 2, "SelectAddrMode3Offset", [], [SDNPWantRoot]> { let EncoderMethod = "getAddrMode3OffsetOpValue"; @@ -951,7 +954,7 @@ def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> { // addrmode5 := reg +/- imm8*4 // def AddrMode5AsmOperand : AsmOperandClass { let Name = "AddrMode5"; } -class AddrMode5 : Operand<i32>, +class AddrMode5 : MemOperand, ComplexPattern<i32, 2, "SelectAddrMode5", []> { let EncoderMethod = "getAddrMode5OpValue"; let DecoderMethod = "DecodeAddrMode5Operand"; @@ -970,7 +973,7 @@ def addrmode5_pre : AddrMode5 { // addrmode6 := reg with optional alignment // def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; } -def addrmode6 : Operand<i32>, +def addrmode6 : MemOperand, ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{ let PrintMethod = "printAddrMode6Operand"; let MIOperandInfo = (ops GPR:$addr, i32imm:$align); @@ -979,7 +982,7 @@ def addrmode6 : Operand<i32>, let ParserMatchClass = AddrMode6AsmOperand; } -def am6offset : Operand<i32>, +def am6offset : MemOperand, ComplexPattern<i32, 1, "SelectAddrMode6Offset", [], [SDNPWantRoot]> { let PrintMethod = "printAddrMode6OffsetOperand"; @@ -990,7 +993,7 @@ def am6offset : Operand<i32>, // Special version of addrmode6 to handle alignment encoding for VST1/VLD1 // (single element from one lane) for size 32. -def addrmode6oneL32 : Operand<i32>, +def addrmode6oneL32 : MemOperand, ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{ let PrintMethod = "printAddrMode6Operand"; let MIOperandInfo = (ops GPR:$addr, i32imm); @@ -998,7 +1001,7 @@ def addrmode6oneL32 : Operand<i32>, } // Base class for addrmode6 with specific alignment restrictions. -class AddrMode6Align : Operand<i32>, +class AddrMode6Align : MemOperand, ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{ let PrintMethod = "printAddrMode6Operand"; let MIOperandInfo = (ops GPR:$addr, i32imm:$align); @@ -1074,7 +1077,7 @@ def addrmode6align64or128or256 : AddrMode6Align { // Special version of addrmode6 to handle alignment encoding for VLD-dup // instructions, specifically VLD4-dup. -def addrmode6dup : Operand<i32>, +def addrmode6dup : MemOperand, ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{ let PrintMethod = "printAddrMode6Operand"; let MIOperandInfo = (ops GPR:$addr, i32imm); @@ -1085,7 +1088,7 @@ def addrmode6dup : Operand<i32>, } // Base class for addrmode6dup with specific alignment restrictions. -class AddrMode6DupAlign : Operand<i32>, +class AddrMode6DupAlign : MemOperand, ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{ let PrintMethod = "printAddrMode6Operand"; let MIOperandInfo = (ops GPR:$addr, i32imm); @@ -1149,7 +1152,7 @@ def addrmode6dupalign64or128 : AddrMode6DupAlign { // addrmodepc := pc + reg // -def addrmodepc : Operand<i32>, +def addrmodepc : MemOperand, ComplexPattern<i32, 2, "SelectAddrModePC", []> { let PrintMethod = "printAddrModePCOperand"; let MIOperandInfo = (ops GPR, i32imm); @@ -1158,7 +1161,7 @@ def addrmodepc : Operand<i32>, // addr_offset_none := reg // def MemNoOffsetAsmOperand : AsmOperandClass { let Name = "MemNoOffset"; } -def addr_offset_none : Operand<i32>, +def addr_offset_none : MemOperand, ComplexPattern<i32, 1, "SelectAddrOffsetNone", []> { let PrintMethod = "printAddrMode7Operand"; let DecoderMethod = "DecodeAddrMode7Operand"; @@ -1417,7 +1420,8 @@ multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir, let isCompare = 1, Defs = [CPSR] in { multiclass AI1_cmp_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, bit Commutable = 0> { + PatFrag opnode, bit Commutable = 0, + string rrDecoderMethod = ""> { def ri : AI1<opcod, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, iii, opc, "\t$Rn, $imm", [(opnode GPR:$Rn, mod_imm:$imm)]>, @@ -1445,6 +1449,7 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, let Inst{15-12} = 0b0000; let Inst{11-4} = 0b00000000; let Inst{3-0} = Rm; + let DecoderMethod = rrDecoderMethod; let Unpredictable{15-12} = 0b1111; } @@ -4263,6 +4268,30 @@ def CRC32W : AI_crc32<0, 0b10, "w", int_arm_crc32w>; def CRC32CW : AI_crc32<1, 0b10, "cw", int_arm_crc32cw>; //===----------------------------------------------------------------------===// +// ARMv8.1a Privilege Access Never extension +// +// SETPAN #imm1 + +def SETPAN : AInoP<(outs), (ins imm0_1:$imm), MiscFrm, NoItinerary, "setpan", + "\t$imm", []>, Requires<[IsARM, HasV8, HasV8_1a]> { + bits<1> imm; + + let Inst{31-28} = 0b1111; + let Inst{27-20} = 0b00010001; + let Inst{19-16} = 0b0000; + let Inst{15-10} = 0b000000; + let Inst{9} = imm; + let Inst{8} = 0b0; + let Inst{7-4} = 0b0000; + let Inst{3-0} = 0b0000; + + let Unpredictable{19-16} = 0b1111; + let Unpredictable{15-10} = 0b111111; + let Unpredictable{8} = 0b1; + let Unpredictable{3-0} = 0b1111; +} + +//===----------------------------------------------------------------------===// // Comparison Instructions... // @@ -4366,7 +4395,8 @@ def : ARMPat<(ARMcmpZ GPR:$src, mod_imm_neg:$imm), // Note that TST/TEQ don't set all the same flags that CMP does! defm TST : AI1_cmp_irs<0b1000, "tst", IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr, - BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1>; + BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1, + "DecodeTSTInstruction">; defm TEQ : AI1_cmp_irs<0b1001, "teq", IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr, BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, 1>; diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 3c62e0e..d0ade77 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -142,7 +142,7 @@ def t_blxtarget : Operand<i32> { // t_addrmode_pc := <label> => pc + imm8 * 4 // -def t_addrmode_pc : Operand<i32> { +def t_addrmode_pc : MemOperand { let EncoderMethod = "getAddrModePCOpValue"; let DecoderMethod = "DecodeThumbAddrModePC"; let PrintMethod = "printThumbLdrLabelOperand"; @@ -153,7 +153,7 @@ def t_addrmode_pc : Operand<i32> { // t_addrmode_rr := reg + reg // def t_addrmode_rr_asm_operand : AsmOperandClass { let Name = "MemThumbRR"; } -def t_addrmode_rr : Operand<i32>, +def t_addrmode_rr : MemOperand, ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> { let EncoderMethod = "getThumbAddrModeRegRegOpValue"; let PrintMethod = "printThumbAddrModeRROperand"; @@ -169,7 +169,7 @@ def t_addrmode_rr : Operand<i32>, // the reg+imm forms will match instead. This is a horrible way to do that, // as it forces tight coupling between the methods, but it's how selectiondag // currently works. -def t_addrmode_rrs1 : Operand<i32>, +def t_addrmode_rrs1 : MemOperand, ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S1", []> { let EncoderMethod = "getThumbAddrModeRegRegOpValue"; let PrintMethod = "printThumbAddrModeRROperand"; @@ -177,7 +177,7 @@ def t_addrmode_rrs1 : Operand<i32>, let ParserMatchClass = t_addrmode_rr_asm_operand; let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg); } -def t_addrmode_rrs2 : Operand<i32>, +def t_addrmode_rrs2 : MemOperand, ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S2", []> { let EncoderMethod = "getThumbAddrModeRegRegOpValue"; let DecoderMethod = "DecodeThumbAddrModeRR"; @@ -185,7 +185,7 @@ def t_addrmode_rrs2 : Operand<i32>, let ParserMatchClass = t_addrmode_rr_asm_operand; let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg); } -def t_addrmode_rrs4 : Operand<i32>, +def t_addrmode_rrs4 : MemOperand, ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S4", []> { let EncoderMethod = "getThumbAddrModeRegRegOpValue"; let DecoderMethod = "DecodeThumbAddrModeRR"; @@ -197,7 +197,7 @@ def t_addrmode_rrs4 : Operand<i32>, // t_addrmode_is4 := reg + imm5 * 4 // def t_addrmode_is4_asm_operand : AsmOperandClass { let Name = "MemThumbRIs4"; } -def t_addrmode_is4 : Operand<i32>, +def t_addrmode_is4 : MemOperand, ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S4", []> { let EncoderMethod = "getAddrModeISOpValue"; let DecoderMethod = "DecodeThumbAddrModeIS"; @@ -209,7 +209,7 @@ def t_addrmode_is4 : Operand<i32>, // t_addrmode_is2 := reg + imm5 * 2 // def t_addrmode_is2_asm_operand : AsmOperandClass { let Name = "MemThumbRIs2"; } -def t_addrmode_is2 : Operand<i32>, +def t_addrmode_is2 : MemOperand, ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S2", []> { let EncoderMethod = "getAddrModeISOpValue"; let DecoderMethod = "DecodeThumbAddrModeIS"; @@ -221,7 +221,7 @@ def t_addrmode_is2 : Operand<i32>, // t_addrmode_is1 := reg + imm5 // def t_addrmode_is1_asm_operand : AsmOperandClass { let Name = "MemThumbRIs1"; } -def t_addrmode_is1 : Operand<i32>, +def t_addrmode_is1 : MemOperand, ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S1", []> { let EncoderMethod = "getAddrModeISOpValue"; let DecoderMethod = "DecodeThumbAddrModeIS"; @@ -235,7 +235,7 @@ def t_addrmode_is1 : Operand<i32>, // FIXME: This really shouldn't have an explicit SP operand at all. It should // be implicit, just like in the instruction encoding itself. def t_addrmode_sp_asm_operand : AsmOperandClass { let Name = "MemThumbSPI"; } -def t_addrmode_sp : Operand<i32>, +def t_addrmode_sp : MemOperand, ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> { let EncoderMethod = "getAddrModeThumbSPOpValue"; let DecoderMethod = "DecodeThumbAddrModeSP"; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 10b0a0e..103ee00 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -148,7 +148,7 @@ def lo5AllOne : PatLeaf<(i32 imm), [{ // t2addrmode_imm12 := reg + imm12 def t2addrmode_imm12_asmoperand : AsmOperandClass {let Name="MemUImm12Offset";} -def t2addrmode_imm12 : Operand<i32>, +def t2addrmode_imm12 : MemOperand, ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> { let PrintMethod = "printAddrModeImm12Operand<false>"; let EncoderMethod = "getAddrModeImm12OpValue"; @@ -178,7 +178,7 @@ def t2adrlabel : Operand<i32> { // t2addrmode_posimm8 := reg + imm8 def MemPosImm8OffsetAsmOperand : AsmOperandClass {let Name="MemPosImm8Offset";} -def t2addrmode_posimm8 : Operand<i32> { +def t2addrmode_posimm8 : MemOperand { let PrintMethod = "printT2AddrModeImm8Operand<false>"; let EncoderMethod = "getT2AddrModeImm8OpValue"; let DecoderMethod = "DecodeT2AddrModeImm8"; @@ -188,7 +188,7 @@ def t2addrmode_posimm8 : Operand<i32> { // t2addrmode_negimm8 := reg - imm8 def MemNegImm8OffsetAsmOperand : AsmOperandClass {let Name="MemNegImm8Offset";} -def t2addrmode_negimm8 : Operand<i32>, +def t2addrmode_negimm8 : MemOperand, ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> { let PrintMethod = "printT2AddrModeImm8Operand<false>"; let EncoderMethod = "getT2AddrModeImm8OpValue"; @@ -199,7 +199,7 @@ def t2addrmode_negimm8 : Operand<i32>, // t2addrmode_imm8 := reg +/- imm8 def MemImm8OffsetAsmOperand : AsmOperandClass { let Name = "MemImm8Offset"; } -class T2AddrMode_Imm8 : Operand<i32>, +class T2AddrMode_Imm8 : MemOperand, ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> { let EncoderMethod = "getT2AddrModeImm8OpValue"; let DecoderMethod = "DecodeT2AddrModeImm8"; @@ -215,7 +215,7 @@ def t2addrmode_imm8_pre : T2AddrMode_Imm8 { let PrintMethod = "printT2AddrModeImm8Operand<true>"; } -def t2am_imm8_offset : Operand<i32>, +def t2am_imm8_offset : MemOperand, ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset", [], [SDNPWantRoot]> { let PrintMethod = "printT2AddrModeImm8OffsetOperand"; @@ -225,7 +225,7 @@ def t2am_imm8_offset : Operand<i32>, // t2addrmode_imm8s4 := reg +/- (imm8 << 2) def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";} -class T2AddrMode_Imm8s4 : Operand<i32> { +class T2AddrMode_Imm8s4 : MemOperand { let EncoderMethod = "getT2AddrModeImm8s4OpValue"; let DecoderMethod = "DecodeT2AddrModeImm8s4"; let ParserMatchClass = MemImm8s4OffsetAsmOperand; @@ -241,7 +241,7 @@ def t2addrmode_imm8s4_pre : T2AddrMode_Imm8s4 { } def t2am_imm8s4_offset_asmoperand : AsmOperandClass { let Name = "Imm8s4"; } -def t2am_imm8s4_offset : Operand<i32> { +def t2am_imm8s4_offset : MemOperand { let PrintMethod = "printT2AddrModeImm8s4OffsetOperand"; let EncoderMethod = "getT2Imm8s4OpValue"; let DecoderMethod = "DecodeT2Imm8S4"; @@ -251,7 +251,7 @@ def t2am_imm8s4_offset : Operand<i32> { def MemImm0_1020s4OffsetAsmOperand : AsmOperandClass { let Name = "MemImm0_1020s4Offset"; } -def t2addrmode_imm0_1020s4 : Operand<i32>, +def t2addrmode_imm0_1020s4 : MemOperand, ComplexPattern<i32, 2, "SelectT2AddrModeExclusive"> { let PrintMethod = "printT2AddrModeImm0_1020s4Operand"; let EncoderMethod = "getT2AddrModeImm0_1020s4OpValue"; @@ -262,7 +262,7 @@ def t2addrmode_imm0_1020s4 : Operand<i32>, // t2addrmode_so_reg := reg + (reg << imm2) def t2addrmode_so_reg_asmoperand : AsmOperandClass {let Name="T2MemRegOffset";} -def t2addrmode_so_reg : Operand<i32>, +def t2addrmode_so_reg : MemOperand, ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> { let PrintMethod = "printT2AddrModeSoRegOperand"; let EncoderMethod = "getT2AddrModeSORegOpValue"; @@ -273,13 +273,13 @@ def t2addrmode_so_reg : Operand<i32>, // Addresses for the TBB/TBH instructions. def addrmode_tbb_asmoperand : AsmOperandClass { let Name = "MemTBB"; } -def addrmode_tbb : Operand<i32> { +def addrmode_tbb : MemOperand { let PrintMethod = "printAddrModeTBB"; let ParserMatchClass = addrmode_tbb_asmoperand; let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm); } def addrmode_tbh_asmoperand : AsmOperandClass { let Name = "MemTBH"; } -def addrmode_tbh : Operand<i32> { +def addrmode_tbh : MemOperand { let PrintMethod = "printAddrModeTBH"; let ParserMatchClass = addrmode_tbh_asmoperand; let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm); @@ -3630,8 +3630,8 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), // Branch and Exchange Jazelle -- for disassembly only // Rm = Inst{19-16} -def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", []>, - Sched<[WriteBr]>, Requires<[IsThumb2, IsNotMClass, PreV8]> { +def t2BXJ : T2I<(outs), (ins GPRnopc:$func), NoItinerary, "bxj", "\t$func", []>, + Sched<[WriteBr]>, Requires<[IsThumb2, IsNotMClass]> { bits<4> func; let Inst{31-27} = 0b11110; let Inst{26} = 0; @@ -4281,6 +4281,23 @@ def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1, //===----------------------------------------------------------------------===// +// ARMv8.1 Privilege Access Never extension +// +// SETPAN #imm1 + +def t2SETPAN : T1I<(outs), (ins imm0_1:$imm), NoItinerary, "setpan\t$imm", []>, + T1Misc<0b0110000>, Requires<[IsThumb2, HasV8, HasV8_1a]> { + bits<1> imm; + + let Inst{4} = 0b1; + let Inst{3} = imm; + let Inst{2-0} = 0b000; + + let Unpredictable{4} = 0b1; + let Unpredictable{2-0} = 0b111; +} + +//===----------------------------------------------------------------------===// // Non-Instruction Patterns // diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index fbec9e6..2a3e1da 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -133,6 +133,7 @@ void ARMSubtarget::initializeEnvironment() { HasV6T2Ops = false; HasV7Ops = false; HasV8Ops = false; + HasV8_1aOps = false; HasVFPv2 = false; HasVFPv3 = false; HasVFPv4 = false; @@ -166,7 +167,6 @@ void ARMSubtarget::initializeEnvironment() { HasTrustZone = false; HasCrypto = false; HasCRC = false; - HasV8_1a = false; HasZeroCycleZeroing = false; AllowsUnalignedMem = false; Thumb2DSP = false; @@ -191,7 +191,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { ARM_MC::ParseARMTriple(TargetTriple.getTriple(), CPUString); if (!FS.empty()) { if (!ArchFS.empty()) - ArchFS = ArchFS + "," + FS.str(); + ArchFS = (Twine(ArchFS) + "," + FS).str(); else ArchFS = FS; } @@ -254,7 +254,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { switch (IT) { case DefaultIT: - RestrictIT = hasV8Ops() ? true : false; + RestrictIT = hasV8Ops(); break; case RestrictedIT: RestrictIT = true; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index f36cd5c..d82314d 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -43,7 +43,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo { protected: enum ARMProcFamilyEnum { Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15, - CortexA17, CortexR5, Swift, CortexA53, CortexA57, Krait, + CortexA17, CortexR4, CortexR4F, CortexR5, Swift, CortexA53, CortexA57, Krait, }; enum ARMProcClassEnum { None, AClass, RClass, MClass @@ -67,6 +67,7 @@ protected: bool HasV6T2Ops; bool HasV7Ops; bool HasV8Ops; + bool HasV8_1aOps; /// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what /// floating point ISAs are supported. @@ -182,9 +183,6 @@ protected: /// HasCRC - if true, processor supports CRC instructions bool HasCRC; - /// HasV8_1a - if true, the processor has V8.1a: PAN and RDMA extensions - bool HasV8_1a; - /// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are /// particularly effective at zeroing a VFP register. bool HasZeroCycleZeroing; @@ -295,6 +293,7 @@ public: bool hasV6T2Ops() const { return HasV6T2Ops; } bool hasV7Ops() const { return HasV7Ops; } bool hasV8Ops() const { return HasV8Ops; } + bool hasV8_1aOps() const { return HasV8_1aOps; } bool isCortexA5() const { return ARMProcFamily == CortexA5; } bool isCortexA7() const { return ARMProcFamily == CortexA7; } @@ -316,7 +315,6 @@ public: bool hasNEON() const { return HasNEON; } bool hasCrypto() const { return HasCrypto; } bool hasCRC() const { return HasCRC; } - bool hasV8_1a() const { return HasV8_1a; } bool hasVirtualization() const { return HasVirtualization; } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 1bee1b0..ae33340 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -42,6 +42,11 @@ EnableARMLoadStoreOpt("arm-load-store-opt", cl::Hidden, cl::desc("Enable ARM load/store optimization pass"), cl::init(true)); +// FIXME: Unify control over GlobalMerge. +static cl::opt<cl::boolOrDefault> +EnableGlobalMerge("arm-global-merge", cl::Hidden, + cl::desc("Enable the global merge pass")); + extern "C" void LLVMInitializeARMTarget() { // Register the target. RegisterTargetMachine<ARMLETargetMachine> X(TheARMLETarget); @@ -332,7 +337,9 @@ void ARMPassConfig::addIRPasses() { } bool ARMPassConfig::addPreISel() { - if (TM->getOptLevel() == CodeGenOpt::Aggressive) + if ((TM->getOptLevel() == CodeGenOpt::Aggressive && + EnableGlobalMerge == cl::BOU_UNSET) || + EnableGlobalMerge == cl::BOU_TRUE) // FIXME: This is using the thumb1 only constant value for // maximal global offset for merging globals. We may want // to look into using the old value for non-thumb1 code of diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 2215efb..b9ad2c8 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -276,8 +276,8 @@ class ARMAsmParser : public MCTargetAsmParser { bool hasD16() const { return STI.getFeatureBits() & ARM::FeatureD16; } - bool hasV8_1a() const { - return STI.getFeatureBits() & ARM::FeatureV8_1a; + bool hasV8_1aOps() const { + return STI.getFeatureBits() & ARM::HasV8_1aOps; } void SwitchMode() { @@ -5418,47 +5418,44 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, /// inclusion of carry set or predication code operands. // // FIXME: It would be nice to autogen this. -void ARMAsmParser:: -getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, - bool &CanAcceptCarrySet, bool &CanAcceptPredicationCode) { - if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" || +void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, + bool &CanAcceptCarrySet, + bool &CanAcceptPredicationCode) { + CanAcceptCarrySet = + Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" || Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" || - Mnemonic == "add" || Mnemonic == "adc" || - Mnemonic == "mul" || Mnemonic == "bic" || Mnemonic == "asr" || - Mnemonic == "orr" || Mnemonic == "mvn" || - Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" || - Mnemonic == "sbc" || Mnemonic == "eor" || Mnemonic == "neg" || - Mnemonic == "vfm" || Mnemonic == "vfnm" || - (!isThumb() && (Mnemonic == "smull" || Mnemonic == "mov" || - Mnemonic == "mla" || Mnemonic == "smlal" || - Mnemonic == "umlal" || Mnemonic == "umull"))) { - CanAcceptCarrySet = true; - } else - CanAcceptCarrySet = false; + Mnemonic == "add" || Mnemonic == "adc" || Mnemonic == "mul" || + Mnemonic == "bic" || Mnemonic == "asr" || Mnemonic == "orr" || + Mnemonic == "mvn" || Mnemonic == "rsb" || Mnemonic == "rsc" || + Mnemonic == "orn" || Mnemonic == "sbc" || Mnemonic == "eor" || + Mnemonic == "neg" || Mnemonic == "vfm" || Mnemonic == "vfnm" || + (!isThumb() && + (Mnemonic == "smull" || Mnemonic == "mov" || Mnemonic == "mla" || + Mnemonic == "smlal" || Mnemonic == "umlal" || Mnemonic == "umull")); if (Mnemonic == "bkpt" || Mnemonic == "cbnz" || Mnemonic == "setend" || - Mnemonic == "cps" || Mnemonic == "it" || Mnemonic == "cbz" || + Mnemonic == "cps" || Mnemonic == "it" || Mnemonic == "cbz" || Mnemonic == "trap" || Mnemonic == "hlt" || Mnemonic == "udf" || Mnemonic.startswith("crc32") || Mnemonic.startswith("cps") || - Mnemonic.startswith("vsel") || - Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" || - Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || - Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" || - Mnemonic == "vrintm" || Mnemonic.startswith("aes") || Mnemonic == "hvc" || + Mnemonic.startswith("vsel") || Mnemonic == "vmaxnm" || + Mnemonic == "vminnm" || Mnemonic == "vcvta" || Mnemonic == "vcvtn" || + Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || Mnemonic == "vrinta" || + Mnemonic == "vrintn" || Mnemonic == "vrintp" || Mnemonic == "vrintm" || + Mnemonic.startswith("aes") || Mnemonic == "hvc" || Mnemonic == "setpan" || Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") || (FullInst.startswith("vmull") && FullInst.endswith(".p64"))) { // These mnemonics are never predicable CanAcceptPredicationCode = false; } else if (!isThumb()) { // Some instructions are only predicable in Thumb mode - CanAcceptPredicationCode - = Mnemonic != "cdp2" && Mnemonic != "clrex" && Mnemonic != "mcr2" && + CanAcceptPredicationCode = + Mnemonic != "cdp2" && Mnemonic != "clrex" && Mnemonic != "mcr2" && Mnemonic != "mcrr2" && Mnemonic != "mrc2" && Mnemonic != "mrrc2" && Mnemonic != "dmb" && Mnemonic != "dsb" && Mnemonic != "isb" && Mnemonic != "pld" && Mnemonic != "pli" && Mnemonic != "pldw" && - Mnemonic != "ldc2" && Mnemonic != "ldc2l" && - Mnemonic != "stc2" && Mnemonic != "stc2l" && - !Mnemonic.startswith("rfe") && !Mnemonic.startswith("srs"); + Mnemonic != "ldc2" && Mnemonic != "ldc2l" && Mnemonic != "stc2" && + Mnemonic != "stc2l" && !Mnemonic.startswith("rfe") && + !Mnemonic.startswith("srs"); } else if (isThumbOne()) { if (hasV6MOps()) CanAcceptPredicationCode = Mnemonic != "movs"; @@ -6153,6 +6150,14 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, "destination operands can't be identical"); return false; } + case ARM::t2BXJ: { + const unsigned RmReg = Inst.getOperand(0).getReg(); + // Rm = SP is no longer unpredictable in v8-A + if (RmReg == ARM::SP && !hasV8Ops()) + return Error(Operands[2]->getStartLoc(), + "r13 (SP) is an unpredictable operand to BXJ"); + return false; + } case ARM::STRD: { // Rt2 must be Rt + 1. unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg()); diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 4d5122a..4c169a8 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -91,7 +91,7 @@ public: MCDisassembler(STI, Ctx) { } - ~ARMDisassembler() {} + ~ARMDisassembler() override {} DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, @@ -106,7 +106,7 @@ public: MCDisassembler(STI, Ctx) { } - ~ThumbDisassembler() {} + ~ThumbDisassembler() override {} DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, @@ -212,6 +212,10 @@ static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val, @@ -2119,6 +2123,54 @@ static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn, return S; } +static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Pred = fieldFromInstruction(Insn, 28, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + + if (Pred == 0xF) + return DecodeSETPANInstruction(Inst, Insn, Address, Decoder); + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, Pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Imm = fieldFromInstruction(Insn, 9, 1); + + const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); + uint64_t FeatureBits = Dis->getSubtargetInfo().getFeatureBits(); + if ((FeatureBits & ARM::HasV8_1aOps) == 0 || + (FeatureBits & ARM::HasV8Ops) == 0 ) + return MCDisassembler::Fail; + + // Decoder can be called from DecodeTST, which does not check the full + // encoding is valid. + if (fieldFromInstruction(Insn, 20,12) != 0xf11 || + fieldFromInstruction(Insn, 4,4) != 0) + return MCDisassembler::Fail; + if (fieldFromInstruction(Insn, 10,10) != 0 || + fieldFromInstruction(Insn, 0,4) != 0) + S = MCDisassembler::SoftFail; + + Inst.setOpcode(ARM::SETPAN); + Inst.addOperand(MCOperand::CreateImm(Imm)); + + return S; +} + static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index e15323d..c2e1b2a 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -40,12 +40,12 @@ static unsigned translateShiftImm(unsigned imm) { /// Prints the shift value with an immediate value. static void printRegImmShift(raw_ostream &O, ARM_AM::ShiftOpc ShOpc, - unsigned ShImm, bool UseMarkup) { + unsigned ShImm, bool UseMarkup) { if (ShOpc == ARM_AM::no_shift || (ShOpc == ARM_AM::lsl && !ShImm)) return; O << ", "; - assert (!(ShOpc == ARM_AM::ror && !ShImm) && "Cannot have ror #0"); + assert(!(ShOpc == ARM_AM::ror && !ShImm) && "Cannot have ror #0"); O << getShiftOpcStr(ShOpc); if (ShOpc != ARM_AM::rrx) { @@ -58,49 +58,52 @@ static void printRegImmShift(raw_ostream &O, ARM_AM::ShiftOpc ShOpc, } } -ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI, - const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) : - MCInstPrinter(MAI, MII, MRI) { - // Initialize the set of available features. - setAvailableFeatures(STI.getFeatureBits()); -} +ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { - OS << markup("<reg:") - << getRegisterName(RegNo) - << markup(">"); + OS << markup("<reg:") << getRegisterName(RegNo) << markup(">"); } void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { + StringRef Annot, const MCSubtargetInfo &STI) { unsigned Opcode = MI->getOpcode(); - switch(Opcode) { + switch (Opcode) { // Check for HINT instructions w/ canonical names. case ARM::HINT: case ARM::tHINT: case ARM::t2HINT: switch (MI->getOperand(0).getImm()) { - case 0: O << "\tnop"; break; - case 1: O << "\tyield"; break; - case 2: O << "\twfe"; break; - case 3: O << "\twfi"; break; - case 4: O << "\tsev"; break; + case 0: + O << "\tnop"; + break; + case 1: + O << "\tyield"; + break; + case 2: + O << "\twfe"; + break; + case 3: + O << "\twfi"; + break; + case 4: + O << "\tsev"; + break; case 5: - if ((getAvailableFeatures() & ARM::HasV8Ops)) { + if ((STI.getFeatureBits() & ARM::HasV8Ops)) { O << "\tsevl"; break; } // Fallthrough for non-v8 default: // Anything else should just print normally. - printInstruction(MI, O); + printInstruction(MI, STI, O); printAnnotation(O, Annot); return; } - printPredicateOperand(MI, 1, O); + printPredicateOperand(MI, 1, STI, O); if (Opcode == ARM::t2HINT) O << ".w"; printAnnotation(O, Annot); @@ -115,8 +118,8 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, const MCOperand &MO3 = MI->getOperand(3); O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm())); - printSBitModifierOperand(MI, 6, O); - printPredicateOperand(MI, 4, O); + printSBitModifierOperand(MI, 6, STI, O); + printPredicateOperand(MI, 4, STI, O); O << '\t'; printRegName(O, Dst.getReg()); @@ -137,8 +140,8 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, const MCOperand &MO2 = MI->getOperand(2); O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm())); - printSBitModifierOperand(MI, 5, O); - printPredicateOperand(MI, 3, O); + printSBitModifierOperand(MI, 5, STI, O); + printPredicateOperand(MI, 3, STI, O); O << '\t'; printRegName(O, Dst.getReg()); @@ -150,10 +153,8 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, return; } - O << ", " - << markup("<imm:") - << "#" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())) - << markup(">"); + O << ", " << markup("<imm:") << "#" + << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())) << markup(">"); printAnnotation(O, Annot); return; } @@ -164,11 +165,11 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, if (MI->getOperand(0).getReg() == ARM::SP && MI->getNumOperands() > 5) { // Should only print PUSH if there are at least two registers in the list. O << '\t' << "push"; - printPredicateOperand(MI, 2, O); + printPredicateOperand(MI, 2, STI, O); if (Opcode == ARM::t2STMDB_UPD) O << ".w"; O << '\t'; - printRegisterList(MI, 4, O); + printRegisterList(MI, 4, STI, O); printAnnotation(O, Annot); return; } else @@ -178,7 +179,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, if (MI->getOperand(2).getReg() == ARM::SP && MI->getOperand(3).getImm() == -4) { O << '\t' << "push"; - printPredicateOperand(MI, 4, O); + printPredicateOperand(MI, 4, STI, O); O << "\t{"; printRegName(O, MI->getOperand(1).getReg()); O << "}"; @@ -193,11 +194,11 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, if (MI->getOperand(0).getReg() == ARM::SP && MI->getNumOperands() > 5) { // Should only print POP if there are at least two registers in the list. O << '\t' << "pop"; - printPredicateOperand(MI, 2, O); + printPredicateOperand(MI, 2, STI, O); if (Opcode == ARM::t2LDMIA_UPD) O << ".w"; O << '\t'; - printRegisterList(MI, 4, O); + printRegisterList(MI, 4, STI, O); printAnnotation(O, Annot); return; } else @@ -207,7 +208,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, if (MI->getOperand(2).getReg() == ARM::SP && MI->getOperand(4).getImm() == 4) { O << '\t' << "pop"; - printPredicateOperand(MI, 5, O); + printPredicateOperand(MI, 5, STI, O); O << "\t{"; printRegName(O, MI->getOperand(0).getReg()); O << "}"; @@ -221,9 +222,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, case ARM::VSTMDDB_UPD: if (MI->getOperand(0).getReg() == ARM::SP) { O << '\t' << "vpush"; - printPredicateOperand(MI, 2, O); + printPredicateOperand(MI, 2, STI, O); O << '\t'; - printRegisterList(MI, 4, O); + printRegisterList(MI, 4, STI, O); printAnnotation(O, Annot); return; } else @@ -234,9 +235,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, case ARM::VLDMDIA_UPD: if (MI->getOperand(0).getReg() == ARM::SP) { O << '\t' << "vpop"; - printPredicateOperand(MI, 2, O); + printPredicateOperand(MI, 2, STI, O); O << '\t'; - printRegisterList(MI, 4, O); + printRegisterList(MI, 4, STI, O); printAnnotation(O, Annot); return; } else @@ -252,12 +253,13 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, O << "\tldm"; - printPredicateOperand(MI, 1, O); + printPredicateOperand(MI, 1, STI, O); O << '\t'; printRegName(O, BaseReg); - if (Writeback) O << "!"; + if (Writeback) + O << "!"; O << ", "; - printRegisterList(MI, 3, O); + printRegisterList(MI, 3, STI, O); printAnnotation(O, Annot); return; } @@ -268,9 +270,11 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, // GPRs. However, when decoding them, the two GRPs cannot be automatically // expressed as a GPRPair, so we have to manually merge them. // FIXME: We would really like to be able to tablegen'erate this. - case ARM::LDREXD: case ARM::STREXD: - case ARM::LDAEXD: case ARM::STLEXD: { - const MCRegisterClass& MRC = MRI.getRegClass(ARM::GPRRegClassID); + case ARM::LDREXD: + case ARM::STREXD: + case ARM::LDAEXD: + case ARM::STLEXD: { + const MCRegisterClass &MRC = MRI.getRegClass(ARM::GPRRegClassID); bool isStore = Opcode == ARM::STREXD || Opcode == ARM::STLEXD; unsigned Reg = MI->getOperand(isStore ? 1 : 0).getReg(); if (MRC.contains(Reg)) { @@ -280,28 +284,27 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, if (isStore) NewMI.addOperand(MI->getOperand(0)); - NewReg = MCOperand::CreateReg(MRI.getMatchingSuperReg(Reg, ARM::gsub_0, - &MRI.getRegClass(ARM::GPRPairRegClassID))); + NewReg = MCOperand::CreateReg(MRI.getMatchingSuperReg( + Reg, ARM::gsub_0, &MRI.getRegClass(ARM::GPRPairRegClassID))); NewMI.addOperand(NewReg); // Copy the rest operands into NewMI. - for(unsigned i= isStore ? 3 : 2; i < MI->getNumOperands(); ++i) + for (unsigned i = isStore ? 3 : 2; i < MI->getNumOperands(); ++i) NewMI.addOperand(MI->getOperand(i)); - printInstruction(&NewMI, O); + printInstruction(&NewMI, STI, O); return; } break; } - // B9.3.3 ERET (Thumb) - // For a target that has Virtualization Extensions, ERET is the preferred - // disassembly of SUBS PC, LR, #0 + // B9.3.3 ERET (Thumb) + // For a target that has Virtualization Extensions, ERET is the preferred + // disassembly of SUBS PC, LR, #0 case ARM::t2SUBS_PC_LR: { - if (MI->getNumOperands() == 3 && - MI->getOperand(0).isImm() && + if (MI->getNumOperands() == 3 && MI->getOperand(0).isImm() && MI->getOperand(0).getImm() == 0 && - (getAvailableFeatures() & ARM::FeatureVirtualization)) { + (STI.getFeatureBits() & ARM::FeatureVirtualization)) { O << "\teret"; - printPredicateOperand(MI, 1, O); + printPredicateOperand(MI, 1, STI, O); printAnnotation(O, Annot); return; } @@ -309,20 +312,18 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, } } - printInstruction(MI, O); + printInstruction(MI, STI, O); printAnnotation(O, Annot); } void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { unsigned Reg = Op.getReg(); printRegName(O, Reg); } else if (Op.isImm()) { - O << markup("<imm:") - << '#' << formatImm(Op.getImm()) - << markup(">"); + O << markup("<imm:") << '#' << formatImm(Op.getImm()) << markup(">"); } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); const MCExpr *Expr = Op.getExpr(); @@ -354,6 +355,7 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); if (MO1.isExpr()) { @@ -370,13 +372,9 @@ void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum, if (OffImm == INT32_MIN) OffImm = 0; if (isSub) { - O << markup("<imm:") - << "#-" << formatImm(-OffImm) - << markup(">"); + O << markup("<imm:") << "#-" << formatImm(-OffImm) << markup(">"); } else { - O << markup("<imm:") - << "#" << formatImm(OffImm) - << markup(">"); + O << markup("<imm:") << "#" << formatImm(OffImm) << markup(">"); } O << "]" << markup(">"); } @@ -387,10 +385,11 @@ void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum, // REG REG 0,SH_OPC - e.g. R5, ROR R3 // REG 0 IMM,SH_OPC - e.g. R5, LSL #3 void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { + const MCSubtargetInfo &STI, + raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); - const MCOperand &MO3 = MI->getOperand(OpNum+2); + const MCOperand &MO2 = MI->getOperand(OpNum + 1); + const MCOperand &MO3 = MI->getOperand(OpNum + 2); printRegName(O, MO1.getReg()); @@ -406,9 +405,10 @@ void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printSORegImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { + const MCSubtargetInfo &STI, + raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO2 = MI->getOperand(OpNum + 1); printRegName(O, MO1.getReg()); @@ -417,28 +417,25 @@ void ARMInstPrinter::printSORegImmOperand(const MCInst *MI, unsigned OpNum, ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup); } - //===--------------------------------------------------------------------===// // Addressing Mode #2 //===--------------------------------------------------------------------===// void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(Op); - const MCOperand &MO2 = MI->getOperand(Op+1); - const MCOperand &MO3 = MI->getOperand(Op+2); + const MCOperand &MO2 = MI->getOperand(Op + 1); + const MCOperand &MO3 = MI->getOperand(Op + 2); O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); if (!MO2.getReg()) { if (ARM_AM::getAM2Offset(MO3.getImm())) { // Don't print +0. - O << ", " - << markup("<imm:") - << "#" + O << ", " << markup("<imm:") << "#" << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) - << ARM_AM::getAM2Offset(MO3.getImm()) - << markup(">"); + << ARM_AM::getAM2Offset(MO3.getImm()) << markup(">"); } O << "]" << markup(">"); return; @@ -454,9 +451,10 @@ void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, } void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op, - raw_ostream &O) { + const MCSubtargetInfo &STI, + raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(Op); - const MCOperand &MO2 = MI->getOperand(Op+1); + const MCOperand &MO2 = MI->getOperand(Op + 1); O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); O << ", "; @@ -465,9 +463,10 @@ void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op, } void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op, - raw_ostream &O) { + const MCSubtargetInfo &STI, + raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(Op); - const MCOperand &MO2 = MI->getOperand(Op+1); + const MCOperand &MO2 = MI->getOperand(Op + 1); O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); O << ", "; @@ -476,35 +475,35 @@ void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op, } void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(Op); - if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. - printOperand(MI, Op, O); + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, Op, STI, O); return; } #ifndef NDEBUG - const MCOperand &MO3 = MI->getOperand(Op+2); + const MCOperand &MO3 = MI->getOperand(Op + 2); unsigned IdxMode = ARM_AM::getAM2IdxMode(MO3.getImm()); - assert(IdxMode != ARMII::IndexModePost && - "Should be pre or offset index op"); + assert(IdxMode != ARMII::IndexModePost && "Should be pre or offset index op"); #endif - printAM2PreOrOffsetIndexOp(MI, Op, O); + printAM2PreOrOffsetIndexOp(MI, Op, STI, O); } void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO2 = MI->getOperand(OpNum + 1); if (!MO1.getReg()) { unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm()); - O << markup("<imm:") - << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) - << ImmOffs + O << markup("<imm:") << '#' + << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) << ImmOffs << markup(">"); return; } @@ -524,8 +523,8 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O, bool AlwaysPrintImm0) { const MCOperand &MO1 = MI->getOperand(Op); - const MCOperand &MO2 = MI->getOperand(Op+1); - const MCOperand &MO3 = MI->getOperand(Op+2); + const MCOperand &MO2 = MI->getOperand(Op + 1); + const MCOperand &MO3 = MI->getOperand(Op + 2); O << markup("<mem:") << '['; printRegName(O, MO1.getReg()); @@ -537,16 +536,12 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, return; } - //If the op is sub we have to print the immediate even if it is 0 + // If the op is sub we have to print the immediate even if it is 0 unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()); ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm()); if (AlwaysPrintImm0 || ImmOffs || (op == ARM_AM::sub)) { - O << ", " - << markup("<imm:") - << "#" - << ARM_AM::getAddrOpcStr(op) - << ImmOffs + O << ", " << markup("<imm:") << "#" << ARM_AM::getAddrOpcStr(op) << ImmOffs << markup(">"); } O << ']' << markup(">"); @@ -554,10 +549,11 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, template <bool AlwaysPrintImm0> void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(Op); - if (!MO1.isReg()) { // For label symbolic references. - printOperand(MI, Op, O); + if (!MO1.isReg()) { // For label symbolic references. + printOperand(MI, Op, STI, O); return; } @@ -569,9 +565,10 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op, void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO2 = MI->getOperand(OpNum + 1); if (MO1.getReg()) { O << getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())); @@ -580,56 +577,56 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI, } unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm()); - O << markup("<imm:") - << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs + O << markup("<imm:") << '#' + << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs << markup(">"); } -void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI, - unsigned OpNum, +void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); unsigned Imm = MO.getImm(); - O << markup("<imm:") - << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff) + O << markup("<imm:") << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff) << markup(">"); } void ARMInstPrinter::printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO2 = MI->getOperand(OpNum + 1); O << (MO2.getImm() ? "" : "-"); printRegName(O, MO1.getReg()); } -void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { +void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); unsigned Imm = MO.getImm(); - O << markup("<imm:") - << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2) + O << markup("<imm:") << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2) << markup(">"); } - void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { - ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(OpNum) - .getImm()); + ARM_AM::AMSubMode Mode = + ARM_AM::getAM4SubMode(MI->getOperand(OpNum).getImm()); O << ARM_AM::getAMSubModeStr(Mode); } template <bool AlwaysPrintImm0> void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO2 = MI->getOperand(OpNum + 1); - if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. - printOperand(MI, OpNum, O); + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, OpNum, STI, O); return; } @@ -639,20 +636,17 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm()); ARM_AM::AddrOpc Op = ARM_AM::getAM5Op(MO2.getImm()); if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) { - O << ", " - << markup("<imm:") - << "#" - << ARM_AM::getAddrOpcStr(Op) - << ImmOffs * 4 - << markup(">"); + O << ", " << markup("<imm:") << "#" << ARM_AM::getAddrOpcStr(Op) + << ImmOffs * 4 << markup(">"); } O << "]" << markup(">"); } void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO2 = MI->getOperand(OpNum + 1); O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); @@ -663,6 +657,7 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); O << markup("<mem:") << "["; @@ -672,6 +667,7 @@ void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum, void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); if (MO.getReg() == 0) @@ -684,49 +680,47 @@ void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI, void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); uint32_t v = ~MO.getImm(); int32_t lsb = countTrailingZeros(v); - int32_t width = (32 - countLeadingZeros (v)) - lsb; + int32_t width = (32 - countLeadingZeros(v)) - lsb; assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!"); - O << markup("<imm:") << '#' << lsb << markup(">") - << ", " - << markup("<imm:") << '#' << width << markup(">"); + O << markup("<imm:") << '#' << lsb << markup(">") << ", " << markup("<imm:") + << '#' << width << markup(">"); } void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned val = MI->getOperand(OpNum).getImm(); - O << ARM_MB::MemBOptToString(val, (getAvailableFeatures() & ARM::HasV8Ops)); + O << ARM_MB::MemBOptToString(val, (STI.getFeatureBits() & ARM::HasV8Ops)); } void ARMInstPrinter::printInstSyncBOption(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned val = MI->getOperand(OpNum).getImm(); O << ARM_ISB::InstSyncBOptToString(val); } void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned ShiftOp = MI->getOperand(OpNum).getImm(); bool isASR = (ShiftOp & (1 << 5)) != 0; unsigned Amt = ShiftOp & 0x1f; if (isASR) { - O << ", asr " - << markup("<imm:") - << "#" << (Amt == 0 ? 32 : Amt) - << markup(">"); - } - else if (Amt) { - O << ", lsl " - << markup("<imm:") - << "#" << Amt + O << ", asr " << markup("<imm:") << "#" << (Amt == 0 ? 32 : Amt) << markup(">"); + } else if (Amt) { + O << ", lsl " << markup("<imm:") << "#" << Amt << markup(">"); } } void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNum).getImm(); if (Imm == 0) @@ -736,6 +730,7 @@ void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNum).getImm(); // A shift amount of 32 is encoded as 0. @@ -746,16 +741,19 @@ void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << "{"; for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) { - if (i != OpNum) O << ", "; + if (i != OpNum) + O << ", "; printRegName(O, MI->getOperand(i).getReg()); } O << "}"; } void ARMInstPrinter::printGPRPairOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Reg = MI->getOperand(OpNum).getReg(); printRegName(O, MRI.getSubReg(Reg, ARM::gsub_0)); @@ -763,8 +761,8 @@ void ARMInstPrinter::printGPRPairOperand(const MCInst *MI, unsigned OpNum, printRegName(O, MRI.getSubReg(Reg, ARM::gsub_1)); } - void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNum); if (Op.getImm()) @@ -774,16 +772,16 @@ void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printCPSIMod(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNum); O << ARM_PROC::IModToString(Op.getImm()); } void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNum); unsigned IFlags = Op.getImm(); - for (int i=2; i >= 0; --i) + for (int i = 2; i >= 0; --i) if (IFlags & (1 << i)) O << ARM_PROC::IFlagsToString(1 << i); @@ -792,11 +790,12 @@ void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNum); unsigned SpecRegRBit = Op.getImm() >> 4; unsigned Mask = Op.getImm() & 0xf; - uint64_t FeatureBits = getAvailableFeatures(); + uint64_t FeatureBits = STI.getFeatureBits(); if (FeatureBits & ARM::FeatureMClass) { unsigned SYSm = Op.getImm(); @@ -805,14 +804,30 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, // For writes, handle extended mask bits if the DSP extension is present. if (Opcode == ARM::t2MSR_M && (FeatureBits & ARM::FeatureDSPThumb2)) { switch (SYSm) { - case 0x400: O << "apsr_g"; return; - case 0xc00: O << "apsr_nzcvqg"; return; - case 0x401: O << "iapsr_g"; return; - case 0xc01: O << "iapsr_nzcvqg"; return; - case 0x402: O << "eapsr_g"; return; - case 0xc02: O << "eapsr_nzcvqg"; return; - case 0x403: O << "xpsr_g"; return; - case 0xc03: O << "xpsr_nzcvqg"; return; + case 0x400: + O << "apsr_g"; + return; + case 0xc00: + O << "apsr_nzcvqg"; + return; + case 0x401: + O << "iapsr_g"; + return; + case 0xc01: + O << "iapsr_nzcvqg"; + return; + case 0x402: + O << "eapsr_g"; + return; + case 0xc02: + O << "eapsr_nzcvqg"; + return; + case 0x403: + O << "xpsr_g"; + return; + case 0xc03: + O << "xpsr_nzcvqg"; + return; } } @@ -823,29 +838,66 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, // ARMv7-M deprecates using MSR APSR without a _<bits> qualifier as an // alias for MSR APSR_nzcvq. switch (SYSm) { - case 0: O << "apsr_nzcvq"; return; - case 1: O << "iapsr_nzcvq"; return; - case 2: O << "eapsr_nzcvq"; return; - case 3: O << "xpsr_nzcvq"; return; + case 0: + O << "apsr_nzcvq"; + return; + case 1: + O << "iapsr_nzcvq"; + return; + case 2: + O << "eapsr_nzcvq"; + return; + case 3: + O << "xpsr_nzcvq"; + return; } } switch (SYSm) { - default: llvm_unreachable("Unexpected mask value!"); - case 0: O << "apsr"; return; - case 1: O << "iapsr"; return; - case 2: O << "eapsr"; return; - case 3: O << "xpsr"; return; - case 5: O << "ipsr"; return; - case 6: O << "epsr"; return; - case 7: O << "iepsr"; return; - case 8: O << "msp"; return; - case 9: O << "psp"; return; - case 16: O << "primask"; return; - case 17: O << "basepri"; return; - case 18: O << "basepri_max"; return; - case 19: O << "faultmask"; return; - case 20: O << "control"; return; + default: + llvm_unreachable("Unexpected mask value!"); + case 0: + O << "apsr"; + return; + case 1: + O << "iapsr"; + return; + case 2: + O << "eapsr"; + return; + case 3: + O << "xpsr"; + return; + case 5: + O << "ipsr"; + return; + case 6: + O << "epsr"; + return; + case 7: + O << "iepsr"; + return; + case 8: + O << "msp"; + return; + case 9: + O << "psp"; + return; + case 16: + O << "primask"; + return; + case 17: + O << "basepri"; + return; + case 18: + O << "basepri_max"; + return; + case 19: + O << "faultmask"; + return; + case 20: + O << "control"; + return; } } @@ -854,10 +906,17 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, if (!SpecRegRBit && (Mask == 8 || Mask == 4 || Mask == 12)) { O << "APSR_"; switch (Mask) { - default: llvm_unreachable("Unexpected mask value!"); - case 4: O << "g"; return; - case 8: O << "nzcvq"; return; - case 12: O << "nzcvqg"; return; + default: + llvm_unreachable("Unexpected mask value!"); + case 4: + O << "g"; + return; + case 8: + O << "nzcvq"; + return; + case 12: + O << "nzcvqg"; + return; } } @@ -868,14 +927,19 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, if (Mask) { O << '_'; - if (Mask & 8) O << 'f'; - if (Mask & 4) O << 's'; - if (Mask & 2) O << 'x'; - if (Mask & 1) O << 'c'; + if (Mask & 8) + O << 'f'; + if (Mask & 4) + O << 's'; + if (Mask & 2) + O << 'x'; + if (Mask & 1) + O << 'c'; } } void ARMInstPrinter::printBankedRegOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { uint32_t Banked = MI->getOperand(OpNum).getImm(); uint32_t R = (Banked & 0x20) >> 5; @@ -886,25 +950,40 @@ void ARMInstPrinter::printBankedRegOperand(const MCInst *MI, unsigned OpNum, if (R) { O << "SPSR_"; - switch(SysM) { - case 0x0e: O << "fiq"; return; - case 0x10: O << "irq"; return; - case 0x12: O << "svc"; return; - case 0x14: O << "abt"; return; - case 0x16: O << "und"; return; - case 0x1c: O << "mon"; return; - case 0x1e: O << "hyp"; return; - default: llvm_unreachable("Invalid banked SPSR register"); + switch (SysM) { + case 0x0e: + O << "fiq"; + return; + case 0x10: + O << "irq"; + return; + case 0x12: + O << "svc"; + return; + case 0x14: + O << "abt"; + return; + case 0x16: + O << "und"; + return; + case 0x1c: + O << "mon"; + return; + case 0x1e: + O << "hyp"; + return; + default: + llvm_unreachable("Invalid banked SPSR register"); } } assert(!R && "should have dealt with SPSR regs"); const char *RegNames[] = { - "r8_usr", "r9_usr", "r10_usr", "r11_usr", "r12_usr", "sp_usr", "lr_usr", "", - "r8_fiq", "r9_fiq", "r10_fiq", "r11_fiq", "r12_fiq", "sp_fiq", "lr_fiq", "", - "lr_irq", "sp_irq", "lr_svc", "sp_svc", "lr_abt", "sp_abt", "lr_und", "sp_und", - "", "", "", "", "lr_mon", "sp_mon", "elr_hyp", "sp_hyp" - }; + "r8_usr", "r9_usr", "r10_usr", "r11_usr", "r12_usr", "sp_usr", "lr_usr", + "", "r8_fiq", "r9_fiq", "r10_fiq", "r11_fiq", "r12_fiq", "sp_fiq", + "lr_fiq", "", "lr_irq", "sp_irq", "lr_svc", "sp_svc", "lr_abt", + "sp_abt", "lr_und", "sp_und", "", "", "", "", + "lr_mon", "sp_mon", "elr_hyp", "sp_hyp"}; const char *Name = RegNames[SysM]; assert(Name[0] && "invalid banked register operand"); @@ -912,6 +991,7 @@ void ARMInstPrinter::printBankedRegOperand(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); // Handle the undefined 15 CC value here for printing so we don't abort(). @@ -923,12 +1003,14 @@ void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum, void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); O << ARMCondCodeToString(CC); } void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { if (MI->getOperand(OpNum).getReg()) { assert(MI->getOperand(OpNum).getReg() == ARM::CPSR && @@ -938,33 +1020,38 @@ void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printNoHashImmediate(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << MI->getOperand(OpNum).getImm(); } void ARMInstPrinter::printPImmediate(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << "p" << MI->getOperand(OpNum).getImm(); } void ARMInstPrinter::printCImmediate(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << "c" << MI->getOperand(OpNum).getImm(); } void ARMInstPrinter::printCoprocOptionImm(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << "{" << MI->getOperand(OpNum).getImm() << "}"; } void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { + const MCSubtargetInfo &STI, raw_ostream &O) { llvm_unreachable("Unhandled PC-relative pseudo-instruction!"); } -template<unsigned scale> +template <unsigned scale> void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { + const MCSubtargetInfo &STI, + raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); if (MO.isExpr()) { @@ -985,25 +1072,26 @@ void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { - O << markup("<imm:") - << "#" << formatImm(MI->getOperand(OpNum).getImm() * 4) + O << markup("<imm:") << "#" << formatImm(MI->getOperand(OpNum).getImm() * 4) << markup(">"); } void ARMInstPrinter::printThumbSRImm(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNum).getImm(); - O << markup("<imm:") - << "#" << formatImm((Imm == 0 ? 32 : Imm)) + O << markup("<imm:") << "#" << formatImm((Imm == 0 ? 32 : Imm)) << markup(">"); } void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { // (3 - the number of trailing zeros) is the number of then / else. unsigned Mask = MI->getOperand(OpNum).getImm(); - unsigned Firstcond = MI->getOperand(OpNum-1).getImm(); + unsigned Firstcond = MI->getOperand(OpNum - 1).getImm(); unsigned CondBit0 = Firstcond & 1; unsigned NumTZ = countTrailingZeros(Mask); assert(NumTZ <= 3 && "Invalid IT mask!"); @@ -1017,12 +1105,13 @@ void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(Op); const MCOperand &MO2 = MI->getOperand(Op + 1); - if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. - printOperand(MI, Op, O); + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, Op, STI, O); return; } @@ -1037,22 +1126,21 @@ void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op, void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI, unsigned Op, + const MCSubtargetInfo &STI, raw_ostream &O, unsigned Scale) { const MCOperand &MO1 = MI->getOperand(Op); const MCOperand &MO2 = MI->getOperand(Op + 1); - if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. - printOperand(MI, Op, O); + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, Op, STI, O); return; } O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); if (unsigned ImmOffs = MO2.getImm()) { - O << ", " - << markup("<imm:") - << "#" << formatImm(ImmOffs * Scale) + O << ", " << markup("<imm:") << "#" << formatImm(ImmOffs * Scale) << markup(">"); } O << "]" << markup(">"); @@ -1060,25 +1148,29 @@ void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI, void ARMInstPrinter::printThumbAddrModeImm5S1Operand(const MCInst *MI, unsigned Op, + const MCSubtargetInfo &STI, raw_ostream &O) { - printThumbAddrModeImm5SOperand(MI, Op, O, 1); + printThumbAddrModeImm5SOperand(MI, Op, STI, O, 1); } void ARMInstPrinter::printThumbAddrModeImm5S2Operand(const MCInst *MI, unsigned Op, + const MCSubtargetInfo &STI, raw_ostream &O) { - printThumbAddrModeImm5SOperand(MI, Op, O, 2); + printThumbAddrModeImm5SOperand(MI, Op, STI, O, 2); } void ARMInstPrinter::printThumbAddrModeImm5S4Operand(const MCInst *MI, unsigned Op, + const MCSubtargetInfo &STI, raw_ostream &O) { - printThumbAddrModeImm5SOperand(MI, Op, O, 4); + printThumbAddrModeImm5SOperand(MI, Op, STI, O, 4); } void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI, unsigned Op, + const MCSubtargetInfo &STI, raw_ostream &O) { - printThumbAddrModeImm5SOperand(MI, Op, O, 4); + printThumbAddrModeImm5SOperand(MI, Op, STI, O, 4); } // Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2 @@ -1086,9 +1178,10 @@ void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI, unsigned Op, // REG 0 0 - e.g. R5 // REG IMM, SH_OPC - e.g. R5, LSL #3 void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO2 = MI->getOperand(OpNum + 1); unsigned Reg = MO1.getReg(); printRegName(O, Reg); @@ -1101,12 +1194,13 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum, template <bool AlwaysPrintImm0> void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO2 = MI->getOperand(OpNum + 1); - if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. - printOperand(MI, OpNum, O); + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, OpNum, STI, O); return; } @@ -1119,26 +1213,20 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, if (OffImm == INT32_MIN) OffImm = 0; if (isSub) { - O << ", " - << markup("<imm:") - << "#-" << formatImm(-OffImm) - << markup(">"); - } - else if (AlwaysPrintImm0 || OffImm > 0) { - O << ", " - << markup("<imm:") - << "#" << formatImm(OffImm) - << markup(">"); + O << ", " << markup("<imm:") << "#-" << formatImm(-OffImm) << markup(">"); + } else if (AlwaysPrintImm0 || OffImm > 0) { + O << ", " << markup("<imm:") << "#" << formatImm(OffImm) << markup(">"); } O << "]" << markup(">"); } -template<bool AlwaysPrintImm0> +template <bool AlwaysPrintImm0> void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO2 = MI->getOperand(OpNum + 1); O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); @@ -1149,28 +1237,23 @@ void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI, if (OffImm == INT32_MIN) OffImm = 0; if (isSub) { - O << ", " - << markup("<imm:") - << "#-" << -OffImm - << markup(">"); + O << ", " << markup("<imm:") << "#-" << -OffImm << markup(">"); } else if (AlwaysPrintImm0 || OffImm > 0) { - O << ", " - << markup("<imm:") - << "#" << OffImm - << markup(">"); + O << ", " << markup("<imm:") << "#" << OffImm << markup(">"); } O << "]" << markup(">"); } -template<bool AlwaysPrintImm0> +template <bool AlwaysPrintImm0> void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO2 = MI->getOperand(OpNum + 1); - if (!MO1.isReg()) { // For label symbolic references. - printOperand(MI, OpNum, O); + if (!MO1.isReg()) { // For label symbolic references. + printOperand(MI, OpNum, STI, O); return; } @@ -1186,39 +1269,31 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, if (OffImm == INT32_MIN) OffImm = 0; if (isSub) { - O << ", " - << markup("<imm:") - << "#-" << -OffImm - << markup(">"); + O << ", " << markup("<imm:") << "#-" << -OffImm << markup(">"); } else if (AlwaysPrintImm0 || OffImm > 0) { - O << ", " - << markup("<imm:") - << "#" << OffImm - << markup(">"); + O << ", " << markup("<imm:") << "#" << OffImm << markup(">"); } O << "]" << markup(">"); } -void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { +void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand( + const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, + raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO2 = MI->getOperand(OpNum + 1); O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); if (MO2.getImm()) { - O << ", " - << markup("<imm:") - << "#" << formatImm(MO2.getImm() * 4) + O << ", " << markup("<imm:") << "#" << formatImm(MO2.getImm() * 4) << markup(">"); } O << "]" << markup(">"); } -void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { +void ARMInstPrinter::printT2AddrModeImm8OffsetOperand( + const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, + raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); int32_t OffImm = (int32_t)MO1.getImm(); O << ", " << markup("<imm:"); @@ -1231,9 +1306,9 @@ void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI, O << markup(">"); } -void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { +void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand( + const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, + raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); int32_t OffImm = (int32_t)MO1.getImm(); @@ -1251,10 +1326,11 @@ void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); - const MCOperand &MO3 = MI->getOperand(OpNum+2); + const MCOperand &MO2 = MI->getOperand(OpNum + 1); + const MCOperand &MO3 = MI->getOperand(OpNum + 2); O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); @@ -1266,71 +1342,74 @@ void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI, unsigned ShAmt = MO3.getImm(); if (ShAmt) { assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!"); - O << ", lsl " - << markup("<imm:") - << "#" << ShAmt - << markup(">"); + O << ", lsl " << markup("<imm:") << "#" << ShAmt << markup(">"); } O << "]" << markup(">"); } void ARMInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); - O << markup("<imm:") - << '#' << ARM_AM::getFPImmFloat(MO.getImm()) + O << markup("<imm:") << '#' << ARM_AM::getFPImmFloat(MO.getImm()) << markup(">"); } void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned EncodedImm = MI->getOperand(OpNum).getImm(); unsigned EltBits; uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits); - O << markup("<imm:") - << "#0x"; + O << markup("<imm:") << "#0x"; O.write_hex(Val); O << markup(">"); } void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNum).getImm(); - O << markup("<imm:") - << "#" << formatImm(Imm + 1) - << markup(">"); + O << markup("<imm:") << "#" << formatImm(Imm + 1) << markup(">"); } void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNum).getImm(); if (Imm == 0) return; - O << ", ror " - << markup("<imm:") - << "#"; + O << ", ror " << markup("<imm:") << "#"; switch (Imm) { - default: assert (0 && "illegal ror immediate!"); - case 1: O << "8"; break; - case 2: O << "16"; break; - case 3: O << "24"; break; + default: + assert(0 && "illegal ror immediate!"); + case 1: + O << "8"; + break; + case 2: + O << "16"; + break; + case 3: + O << "24"; + break; } O << markup(">"); } void ARMInstPrinter::printModImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { MCOperand Op = MI->getOperand(OpNum); // Support for fixups (MCFixup) if (Op.isExpr()) - return printOperand(MI, OpNum, O); + return printOperand(MI, OpNum, STI, O); unsigned Bits = Op.getImm() & 0xFF; unsigned Rot = (Op.getImm() & 0xF00) >> 7; - bool PrintUnsigned = false; - switch (MI->getOpcode()){ + bool PrintUnsigned = false; + switch (MI->getOpcode()) { case ARM::MOVi: // Movs to PC should be treated unsigned PrintUnsigned = (MI->getOperand(OpNum - 1).getReg() == ARM::PC); @@ -1354,36 +1433,30 @@ void ARMInstPrinter::printModImmOperand(const MCInst *MI, unsigned OpNum, } // Explicit #bits, #rot implied - O << "#" - << markup("<imm:") - << Bits - << markup(">") - << ", #" - << markup("<imm:") - << Rot - << markup(">"); + O << "#" << markup("<imm:") << Bits << markup(">") << ", #" << markup("<imm:") + << Rot << markup(">"); } void ARMInstPrinter::printFBits16(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << markup("<imm:") - << "#" << 16 - MI->getOperand(OpNum).getImm() + const MCSubtargetInfo &STI, raw_ostream &O) { + O << markup("<imm:") << "#" << 16 - MI->getOperand(OpNum).getImm() << markup(">"); } void ARMInstPrinter::printFBits32(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << markup("<imm:") - << "#" << 32 - MI->getOperand(OpNum).getImm() + const MCSubtargetInfo &STI, raw_ostream &O) { + O << markup("<imm:") << "#" << 32 - MI->getOperand(OpNum).getImm() << markup(">"); } void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << "[" << MI->getOperand(OpNum).getImm() << "]"; } void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << "{"; printRegName(O, MI->getOperand(OpNum).getReg()); @@ -1391,7 +1464,8 @@ void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { + const MCSubtargetInfo &STI, + raw_ostream &O) { unsigned Reg = MI->getOperand(OpNum).getReg(); unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1); @@ -1402,8 +1476,8 @@ void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum, O << "}"; } -void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, - unsigned OpNum, +void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Reg = MI->getOperand(OpNum).getReg(); unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); @@ -1416,6 +1490,7 @@ void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, } void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the @@ -1430,6 +1505,7 @@ void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the @@ -1447,6 +1523,7 @@ void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum, void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << "{"; printRegName(O, MI->getOperand(OpNum).getReg()); @@ -1455,6 +1532,7 @@ void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI, void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Reg = MI->getOperand(OpNum).getReg(); unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); @@ -1468,6 +1546,7 @@ void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI, void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the @@ -1482,8 +1561,9 @@ void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI, } void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { + unsigned OpNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the // sort order is guaranteed because they're all of the form D<n>. @@ -1498,9 +1578,9 @@ void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI, O << "[]}"; } -void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { +void ARMInstPrinter::printVectorListTwoSpacedAllLanes( + const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, + raw_ostream &O) { unsigned Reg = MI->getOperand(OpNum).getReg(); unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2); @@ -1511,24 +1591,24 @@ void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI, O << "[]}"; } -void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { +void ARMInstPrinter::printVectorListThreeSpacedAllLanes( + const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, + raw_ostream &O) { // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the // sort order is guaranteed because they're all of the form D<n>. O << "{"; printRegName(O, MI->getOperand(OpNum).getReg()); - O << "[], "; + O << "[], "; printRegName(O, MI->getOperand(OpNum).getReg() + 2); O << "[], "; printRegName(O, MI->getOperand(OpNum).getReg() + 4); O << "[]}"; } -void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { +void ARMInstPrinter::printVectorListFourSpacedAllLanes( + const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, + raw_ostream &O) { // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the // sort order is guaranteed because they're all of the form D<n>. @@ -1545,6 +1625,7 @@ void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI, void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the @@ -1558,9 +1639,9 @@ void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI, O << "}"; } -void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { +void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the // sort order is guaranteed because they're all of the form D<n>. diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index f179e01..3927c9f 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -24,146 +24,207 @@ class MCOperand; class ARMInstPrinter : public MCInstPrinter { public: ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); + const MCRegisterInfo &MRI); - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; void printRegName(raw_ostream &OS, unsigned RegNo) const override; // Autogenerated by tblgen. - void printInstruction(const MCInst *MI, raw_ostream &O); + void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &O); static const char *getRegisterName(unsigned RegNo); - - void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - - void printSORegRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printSORegImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - void printAddrModeTBB(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAddrModeTBH(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); + + void printSORegRegOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printSORegImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + + void printAddrModeTBB(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printAddrModeTBH(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); template <bool AlwaysPrintImm0> - void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O, bool AlwaysPrintImm0); void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); + void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printPostIdxImm8s4Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); - void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); template <bool AlwaysPrintImm0> - void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); - void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printInstSyncBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printMemBOption(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printInstSyncBOption(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printShiftImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); template <unsigned scale> - void printAdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAdrLabelOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printThumbSRImm(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printThumbITMask(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); void printThumbAddrModeImm5SOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O, unsigned Scale); void printThumbAddrModeImm5S1Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printThumbAddrModeImm5S2Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printThumbAddrModeImm5S4Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); - void printT2SOOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - template<bool AlwaysPrintImm0> + void printT2SOOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + template <bool AlwaysPrintImm0> void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - template<bool AlwaysPrintImm0> + const MCSubtargetInfo &STI, raw_ostream &O); + template <bool AlwaysPrintImm0> void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - template<bool AlwaysPrintImm0> + const MCSubtargetInfo &STI, raw_ostream &O); + template <bool AlwaysPrintImm0> void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); void printT2AddrModeImm0_1020s4Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, + raw_ostream &O); void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - void printSetendOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printCPSIMod(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printCPSIFlag(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printBankedRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); + + void printSetendOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printCPSIMod(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printCPSIFlag(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printBankedRegOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printPredicateOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printSBitModifierOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printRegisterList(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printPImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printCImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printCoprocOptionImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printRotImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printGPRPairOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); + void printRegisterList(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printNoHashImmediate(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printPImmediate(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printCImmediate(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printCoprocOptionImm(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printFPImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printRotImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printModImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printGPRPairOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + + void printPCLabel(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printFBits16(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printFBits32(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); + void printFBits16(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printFBits32(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printVectorIndex(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printVectorListOne(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printVectorListTwo(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); + void printVectorListThree(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printVectorListFour(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); void printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); void printVectorListThreeAllLanes(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); void printVectorListFourAllLanes(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printVectorListFourSpacedAllLanes(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printVectorListThreeSpaced(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); void printVectorListFourSpaced(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); }; } // end namespace llvm diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 0b2e3b0..590d72f 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -153,18 +153,20 @@ void ARMAsmBackend::handleAssemblerFlag(MCAssemblerFlag Flag) { } } // end anonymous namespace -static unsigned getRelaxedOpcode(unsigned Op) { +unsigned ARMAsmBackend::getRelaxedOpcode(unsigned Op) const { + bool HasThumb2 = STI->getFeatureBits() & ARM::FeatureThumb2; + switch (Op) { default: return Op; case ARM::tBcc: - return ARM::t2Bcc; + return HasThumb2 ? (unsigned)ARM::t2Bcc : Op; case ARM::tLDRpci: - return ARM::t2LDRpci; + return HasThumb2 ? (unsigned)ARM::t2LDRpci : Op; case ARM::tADR: - return ARM::t2ADR; + return HasThumb2 ? (unsigned)ARM::t2ADR : Op; case ARM::tB: - return ARM::t2B; + return HasThumb2 ? (unsigned)ARM::t2B : Op; case ARM::tCBZ: return ARM::tHINT; case ARM::tCBNZ: @@ -589,7 +591,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, (unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 && (unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) { if (A) { - const MCSymbol &Sym = A->getSymbol().AliasedSymbol(); + const MCSymbol &Sym = A->getSymbol(); if (Asm.isThumbFunc(&Sym)) Value |= 1; } @@ -598,7 +600,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, // the basic blocks of the same function. Thus, we would like to resolve // the offset when the destination has the same MCFragment. if (A && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) { - const MCSymbol &Sym = A->getSymbol().AliasedSymbol(); + const MCSymbol &Sym = A->getSymbol(); const MCSymbolData &SymData = Asm.getSymbolData(Sym); IsResolved = (SymData.getFragment() == DF); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h index f4f1082..4fa8c79 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h @@ -47,6 +47,8 @@ public: void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value, bool IsPCRel) const override; + unsigned getRelaxedOpcode(unsigned Op) const; + bool mayNeedRelaxation(const MCInst &Inst) const override; bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h index 3bd7ab7..ebef789 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h @@ -23,7 +23,7 @@ public: HasDataInCodeSupport = true; } - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createARMMachObjectWriter(OS, /*Is64Bit=*/false, MachO::CPU_TYPE_ARM, Subtype); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h index 4efd325..263c4c4 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h @@ -18,7 +18,7 @@ public: ARMAsmBackendELF(const Target &T, StringRef TT, uint8_t OSABI, bool IsLittle) : ARMAsmBackend(T, TT, IsLittle), OSABI(OSABI) {} - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createARMELFObjectWriter(OS, OSABI, isLittle()); } }; diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h index 33be347..f2c4358 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h @@ -17,7 +17,7 @@ class ARMAsmBackendWinCOFF : public ARMAsmBackend { public: ARMAsmBackendWinCOFF(const Target &T, StringRef Triple) : ARMAsmBackend(T, Triple, true) {} - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false); } }; diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index a821a6b..f4fedee 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -32,7 +32,7 @@ namespace { public: ARMELFObjectWriter(uint8_t OSABI); - virtual ~ARMELFObjectWriter(); + ~ARMELFObjectWriter() override; unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; @@ -81,7 +81,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, unsigned Type = 0; if (IsPCRel) { switch ((unsigned)Fixup.getKind()) { - default: llvm_unreachable("Unimplemented"); + default: + report_fatal_error("unsupported relocation on symbol"); + return ELF::R_ARM_NONE; case FK_Data_4: switch (Modifier) { default: llvm_unreachable("Unsupported Modifier"); @@ -147,7 +149,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, } } else { switch ((unsigned)Fixup.getKind()) { - default: llvm_unreachable("invalid fixup kind!"); + default: + report_fatal_error("unsupported relocation on symbol"); + return ELF::R_ARM_NONE; case FK_Data_1: switch (Modifier) { default: llvm_unreachable("unsupported Modifier"); @@ -247,7 +251,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, return Type; } -MCObjectWriter *llvm::createARMELFObjectWriter(raw_ostream &OS, +MCObjectWriter *llvm::createARMELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, bool IsLittleEndian) { MCELFObjectTargetWriter *MOTW = new ARMELFObjectWriter(OSABI); diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 9648ffa..e7c777e 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -472,7 +472,7 @@ class ARMELFStreamer : public MCELFStreamer { public: friend class ARMTargetELFStreamer; - ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, + ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_pwrite_stream &OS, MCCodeEmitter *Emitter, bool IsThumb) : MCELFStreamer(Context, TAB, OS, Emitter), IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None) { @@ -1083,14 +1083,13 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix, } // Get .ARM.extab or .ARM.exidx section - const MCSectionELF *EHSection = nullptr; - if (const MCSymbol *Group = FnSection.getGroup()) { - EHSection = - getContext().getELFSection(EHSecName, Type, Flags | ELF::SHF_GROUP, - FnSection.getEntrySize(), Group->getName()); - } else { - EHSection = getContext().getELFSection(EHSecName, Type, Flags); - } + const MCSymbol *Group = FnSection.getGroup(); + if (Group) + Flags |= ELF::SHF_GROUP; + const MCSectionELF *EHSection = + getContext().getELFSection(EHSecName, Type, Flags, 0, Group, + FnSection.getUniqueID(), nullptr, &FnSection); + assert(EHSection && "Failed to get the required EH section"); // Switch to .ARM.extab or .ARM.exidx section @@ -1383,8 +1382,9 @@ MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S, } MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - bool RelaxAll, bool IsThumb) { + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll, + bool IsThumb) { ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb); // FIXME: This should eventually end up somewhere else where more // intelligent flag decisions can be made. For now we are just maintaining diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index e48cabb..6b650f0 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -48,7 +48,7 @@ public: : MCII(mcii), CTX(ctx), IsLittleEndian(IsLittle) { } - ~ARMMCCodeEmitter() {} + ~ARMMCCodeEmitter() override {} bool isThumb(const MCSubtargetInfo &STI) const { return (STI.getFeatureBits() & ARM::ModeThumb) != 0; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 7ff7f9a..daa8af2 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -255,7 +255,7 @@ MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(StringRef TT, StringRef CPU, std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPU); if (!FS.empty()) { if (!ArchFS.empty()) - ArchFS = ArchFS + "," + FS.str(); + ArchFS = (Twine(ArchFS) + "," + FS).str(); else ArchFS = FS; } @@ -310,27 +310,26 @@ static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM, } static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx, - MCAsmBackend &MAB, raw_ostream &OS, + MCAsmBackend &MAB, raw_pwrite_stream &OS, MCCodeEmitter *Emitter, bool RelaxAll) { return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, T.getArch() == Triple::thumb); } static MCStreamer *createARMMachOStreamer(MCContext &Ctx, MCAsmBackend &MAB, - raw_ostream &OS, + raw_pwrite_stream &OS, MCCodeEmitter *Emitter, bool RelaxAll, bool DWARFMustBeAtTheEnd) { return createMachOStreamer(Ctx, MAB, OS, Emitter, false, DWARFMustBeAtTheEnd); } -static MCInstPrinter *createARMMCInstPrinter(const Target &T, +static MCInstPrinter *createARMMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { + const MCRegisterInfo &MRI) { if (SyntaxVariant == 0) - return new ARMInstPrinter(MAI, MII, MRI, STI); + return new ARMInstPrinter(MAI, MII, MRI); return nullptr; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index 7e9ba66..24ca567 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -34,6 +34,7 @@ class StringRef; class Target; class Triple; class raw_ostream; +class raw_pwrite_stream; extern Target TheARMLETarget, TheThumbLETarget; extern Target TheARMBETarget, TheThumbBETarget; @@ -41,9 +42,8 @@ extern Target TheARMBETarget, TheThumbBETarget; namespace ARM_MC { std::string ParseARMTriple(StringRef TT, StringRef CPU); - /// createARMMCSubtargetInfo - Create a ARM MCSubtargetInfo instance. - /// This is exposed so Asm parser, etc. do not need to go through - /// TargetRegistry. + /// Create a ARM MCSubtargetInfo instance. This is exposed so Asm parser, etc. + /// do not need to go through TargetRegistry. MCSubtargetInfo *createARMMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS); } @@ -83,24 +83,23 @@ MCAsmBackend *createThumbBEAsmBackend(const Target &T, const MCRegisterInfo &MRI // Construct a PE/COFF machine code streamer which will generate a PE/COFF // object file. MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - bool RelaxAll); + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll); -/// createARMELFObjectWriter - Construct an ELF Mach-O object writer. -MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS, - uint8_t OSABI, +/// Construct an ELF Mach-O object writer. +MCObjectWriter *createARMELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, bool IsLittleEndian); -/// createARMMachObjectWriter - Construct an ARM Mach-O object writer. -MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS, - bool Is64Bit, +/// Construct an ARM Mach-O object writer. +MCObjectWriter *createARMMachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype); -/// createARMWinCOFFObjectWriter - Construct an ARM PE/COFF object writer. -MCObjectWriter *createARMWinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit); +/// Construct an ARM PE/COFF object writer. +MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit); -/// createARMMachORelocationInfo - Construct ARM Mach-O relocation info. +/// Construct ARM Mach-O relocation info. MCRelocationInfo *createARMMachORelocationInfo(MCContext &Ctx); } // End llvm namespace diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 3187d36..b1f9b58 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -88,6 +88,7 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, case ARM::fixup_arm_ldst_pcrel_12: case ARM::fixup_arm_pcrel_10: case ARM::fixup_arm_adr_pcrel_12: + case ARM::fixup_arm_thumb_br: return false; // Handle 24-bit branch kinds. @@ -101,12 +102,6 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, Log2Size = llvm::Log2_32(4); return true; - // Handle Thumb branches. - case ARM::fixup_arm_thumb_br: - RelocType = unsigned(MachO::ARM_THUMB_RELOC_BR22); - Log2Size = llvm::Log2_32(2); - return true; - case ARM::fixup_t2_uncondbranch: case ARM::fixup_arm_thumb_bl: case ARM::fixup_arm_thumb_blx: @@ -477,9 +472,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); } -MCObjectWriter *llvm::createARMMachObjectWriter(raw_ostream &OS, - bool Is64Bit, - uint32_t CPUType, +MCObjectWriter *llvm::createARMMachObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) { return createMachObjectWriter(new ARMMachObjectWriter(Is64Bit, CPUType, diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp index 2fd6445..166c04b 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp @@ -26,7 +26,7 @@ public: : MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARMNT) { assert(!Is64Bit && "AArch64 support not yet implemented"); } - virtual ~ARMWinCOFFObjectWriter() { } + ~ARMWinCOFFObjectWriter() override {} unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsCrossSection, @@ -82,7 +82,8 @@ bool ARMWinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const { } namespace llvm { -MCObjectWriter *createARMWinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit) { +MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit) { MCWinCOFFObjectTargetWriter *MOTW = new ARMWinCOFFObjectWriter(Is64Bit); return createWinCOFFObjectWriter(MOTW, OS); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp index dc707dc..b993b1b 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp @@ -16,8 +16,8 @@ namespace { class ARMWinCOFFStreamer : public MCWinCOFFStreamer { public: ARMWinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter &CE, - raw_ostream &OS) - : MCWinCOFFStreamer(C, AB, CE, OS) { } + raw_pwrite_stream &OS) + : MCWinCOFFStreamer(C, AB, CE, OS) {} void EmitAssemblerFlag(MCAssemblerFlag Flag) override; void EmitThumbFunc(MCSymbol *Symbol) override; @@ -38,7 +38,8 @@ void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) { } MCStreamer *llvm::createARMWinCOFFStreamer(MCContext &Context, - MCAsmBackend &MAB, raw_ostream &OS, + MCAsmBackend &MAB, + raw_pwrite_stream &OS, MCCodeEmitter *Emitter, bool RelaxAll) { return new ARMWinCOFFStreamer(Context, MAB, *Emitter, OS); diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp index b91b0e1..b2599fe 100644 --- a/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -132,7 +132,7 @@ SDNode *BPFDAGToDAGISel::Select(SDNode *Node) { } case ISD::FrameIndex: { - int FI = dyn_cast<FrameIndexSDNode>(Node)->getIndex(); + int FI = cast<FrameIndexSDNode>(Node)->getIndex(); EVT VT = Node->getValueType(0); SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT); unsigned Opc = BPF::MOV_rr; diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp index d94416b..37f9164 100644 --- a/lib/Target/BPF/BPFISelLowering.cpp +++ b/lib/Target/BPF/BPFISelLowering.cpp @@ -63,11 +63,11 @@ public: std::string Str; raw_string_ostream OS(Str); - if (DLoc.isUnknown() == false) { - DILocation DIL(DLoc.getAsMDNode(Fn.getContext())); - StringRef Filename = DIL.getFilename(); - unsigned Line = DIL.getLineNumber(); - unsigned Column = DIL.getColumnNumber(); + if (DLoc) { + auto DIL = DLoc.get(); + StringRef Filename = DIL->getFilename(); + unsigned Line = DIL->getLine(); + unsigned Column = DIL->getColumn(); OS << Filename << ':' << Line << ':' << Column << ' '; } @@ -137,7 +137,6 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); - setOperationAction(ISD::BSWAP, MVT::i64, Expand); setOperationAction(ISD::CTTZ, MVT::i64, Custom); setOperationAction(ISD::CTLZ, MVT::i64, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom); @@ -538,12 +537,10 @@ SDValue BPFTargetLowering::LowerGlobalAddress(SDValue Op, MachineBasicBlock * BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { - unsigned Opc = MI->getOpcode(); - const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); - assert(Opc == BPF::Select && "Unexpected instr type to insert"); + assert(MI->getOpcode() == BPF::Select && "Unexpected instr type to insert"); // To "insert" a SELECT instruction, we actually have to insert the diamond // control-flow pattern. The incoming instruction knows the destination vreg diff --git a/lib/Target/BPF/BPFInstrInfo.td b/lib/Target/BPF/BPFInstrInfo.td index 47001f0..26b2cfe 100644 --- a/lib/Target/BPF/BPFInstrInfo.td +++ b/lib/Target/BPF/BPFInstrInfo.td @@ -231,8 +231,6 @@ class MOV_RI<string OpcodeStr> let BPFSrc = 0; // BPF_K let BPFClass = 7; // BPF_ALU64 } -def MOV_rr : MOV_RR<"mov">; -def MOV_ri : MOV_RI<"mov">; class LD_IMM64<bits<4> Pseudo, string OpcodeStr> : InstBPF<(outs GPR:$dst), (ins u64imm:$imm), @@ -255,7 +253,35 @@ class LD_IMM64<bits<4> Pseudo, string OpcodeStr> let size = 3; // BPF_DW let BPFClass = 0; // BPF_LD } + +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { def LD_imm64 : LD_IMM64<0, "ld_64">; +def MOV_rr : MOV_RR<"mov">; +def MOV_ri : MOV_RI<"mov">; +} + +def LD_pseudo + : InstBPF<(outs GPR:$dst), (ins i64imm:$pseudo, u64imm:$imm), + "ld_pseudo\t$dst, $pseudo, $imm", + [(set GPR:$dst, (int_bpf_pseudo imm:$pseudo, imm:$imm))]> { + + bits<3> mode; + bits<2> size; + bits<4> dst; + bits<64> imm; + bits<4> pseudo; + + let Inst{63-61} = mode; + let Inst{60-59} = size; + let Inst{51-48} = dst; + let Inst{55-52} = pseudo; + let Inst{47-32} = 0; + let Inst{31-0} = imm{31-0}; + + let mode = 0; // BPF_IMM + let size = 3; // BPF_DW + let BPFClass = 0; // BPF_LD +} // STORE instructions class STORE<bits<2> SizeOp, string OpcodeStr, list<dag> Pattern> @@ -461,6 +487,33 @@ def XADD64 : XADD<3, "xadd64", atomic_load_add_64>; // undefined def XADD8 : XADD<2, "xadd8", atomic_load_add_8>; } +// bswap16, bswap32, bswap64 +class BSWAP<bits<32> SizeOp, string OpcodeStr, list<dag> Pattern> + : InstBPF<(outs GPR:$dst), (ins GPR:$src), + !strconcat(OpcodeStr, "\t$dst"), + Pattern> { + bits<4> op; + bits<1> BPFSrc; + bits<4> dst; + bits<32> imm; + + let Inst{63-60} = op; + let Inst{59} = BPFSrc; + let Inst{51-48} = dst; + let Inst{31-0} = imm; + + let op = 0xd; // BPF_END + let BPFSrc = 1; // BPF_TO_BE (TODO: use BPF_TO_LE for big-endian target) + let BPFClass = 4; // BPF_ALU + let imm = SizeOp; +} + +let Constraints = "$dst = $src" in { +def BSWAP16 : BSWAP<16, "bswap16", [(set GPR:$dst, (srl (bswap GPR:$src), (i64 48)))]>; +def BSWAP32 : BSWAP<32, "bswap32", [(set GPR:$dst, (srl (bswap GPR:$src), (i64 32)))]>; +def BSWAP64 : BSWAP<64, "bswap64", [(set GPR:$dst, (bswap GPR:$src))]>; +} + let Defs = [R0, R1, R2, R3, R4, R5], Uses = [R6], hasSideEffects = 1, hasExtraDefRegAllocReq = 1, hasExtraSrcRegAllocReq = 1, mayLoad = 1 in { class LOAD_ABS<bits<2> SizeOp, string OpcodeStr, Intrinsic OpNode> diff --git a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp b/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp index 3f09379..05f6d82 100644 --- a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp +++ b/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp @@ -27,7 +27,7 @@ using namespace llvm; #include "BPFGenAsmWriter.inc" void BPFInstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { + StringRef Annot, const MCSubtargetInfo &STI) { printInstruction(MI, O); printAnnotation(O, Annot); } diff --git a/lib/Target/BPF/InstPrinter/BPFInstPrinter.h b/lib/Target/BPF/InstPrinter/BPFInstPrinter.h index d7c2899..adcaff6 100644 --- a/lib/Target/BPF/InstPrinter/BPFInstPrinter.h +++ b/lib/Target/BPF/InstPrinter/BPFInstPrinter.h @@ -25,7 +25,8 @@ public: const MCRegisterInfo &MRI) : MCInstPrinter(MAI, MII, MRI) {} - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, const char *Modifier = nullptr); void printMemOperand(const MCInst *MI, int OpNo, raw_ostream &O, diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp index 87c8077..8393135 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp @@ -31,7 +31,7 @@ public: void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value, bool IsPCRel) const override; - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override; + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; // No instruction requires relaxation bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, @@ -71,7 +71,7 @@ void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, *(uint16_t *)&Data[Fixup.getOffset() + 2] = (uint16_t)((Value - 8) / 8); } -MCObjectWriter *BPFAsmBackend::createObjectWriter(raw_ostream &OS) const { +MCObjectWriter *BPFAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { return createBPFELFObjectWriter(OS, 0); } } diff --git a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp index 169a8a7..a5562c1 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp @@ -47,7 +47,7 @@ unsigned BPFELFObjectWriter::GetRelocType(const MCValue &Target, } } -MCObjectWriter *llvm::createBPFELFObjectWriter(raw_ostream &OS, uint8_t OSABI) { +MCObjectWriter *llvm::createBPFELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI) { MCELFObjectTargetWriter *MOTW = new BPFELFObjectWriter(OSABI); return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true); } diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp index 9c51d66..32d2ef5 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp @@ -76,9 +76,8 @@ unsigned BPFMCCodeEmitter::getMachineOpValue(const MCInst &MI, assert(MO.isExpr()); const MCExpr *Expr = MO.getExpr(); - MCExpr::ExprKind Kind = Expr->getKind(); - assert(Kind == MCExpr::SymbolRef); + assert(Expr->getKind() == MCExpr::SymbolRef); if (MI.getOpcode() == BPF::JAL) // func call name @@ -125,7 +124,7 @@ void BPFMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, // Keep track of the current byte being emitted unsigned CurByte = 0; - if (Opcode == BPF::LD_imm64) { + if (Opcode == BPF::LD_imm64 || Opcode == BPF::LD_pseudo) { uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI); EmitByte(Value >> 56, CurByte, OS); EmitByte(((Value >> 48) & 0xff), CurByte, OS); diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp index fd04001..95f0b02 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp @@ -63,15 +63,16 @@ static MCCodeGenInfo *createBPFMCCodeGenInfo(StringRef TT, Reloc::Model RM, static MCStreamer *createBPFMCStreamer(const Triple &T, MCContext &Ctx, MCAsmBackend &MAB, - raw_ostream &OS, MCCodeEmitter *Emitter, + raw_pwrite_stream &OS, MCCodeEmitter *Emitter, bool RelaxAll) { return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll); } -static MCInstPrinter * -createBPFMCInstPrinter(const Target &T, unsigned SyntaxVariant, - const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { +static MCInstPrinter *createBPFMCInstPrinter(const Triple &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI) { if (SyntaxVariant == 0) return new BPFInstPrinter(MAI, MII, MRI); return 0; diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h index 1fd2bec..ce08b7c 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h +++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h @@ -28,6 +28,7 @@ class MCSubtargetInfo; class Target; class StringRef; class raw_ostream; +class raw_pwrite_stream; extern Target TheBPFTarget; @@ -38,7 +39,7 @@ MCCodeEmitter *createBPFMCCodeEmitter(const MCInstrInfo &MCII, MCAsmBackend *createBPFAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); -MCObjectWriter *createBPFELFObjectWriter(raw_ostream &OS, uint8_t OSABI); +MCObjectWriter *createBPFELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI); } // Defines symbolic names for BPF registers. This defines a mapping from diff --git a/lib/Target/BPF/MCTargetDesc/LLVMBuild.txt b/lib/Target/BPF/MCTargetDesc/LLVMBuild.txt index 209d17c..8bca2e3 100644 --- a/lib/Target/BPF/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/BPF/MCTargetDesc/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = BPFDesc parent = BPF -required_libraries = MC BPFAsmPrinter BPFInfo +required_libraries = MC BPFAsmPrinter BPFInfo Support add_to_library_groups = BPF diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index d0e2010..f1a7127 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -15,6 +15,7 @@ #include "CPPTargetMachine.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Config/config.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" @@ -91,6 +92,7 @@ namespace { /// CppWriter - This class is the main chunk of code that converts an LLVM /// module to a C++ translation unit. class CppWriter : public ModulePass { + std::unique_ptr<formatted_raw_ostream> OutOwner; formatted_raw_ostream &Out; const Module *TheModule; uint64_t uniqueNum; @@ -105,8 +107,9 @@ namespace { public: static char ID; - explicit CppWriter(formatted_raw_ostream &o) : - ModulePass(ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){} + explicit CppWriter(std::unique_ptr<formatted_raw_ostream> o) + : ModulePass(ID), OutOwner(std::move(o)), Out(*OutOwner), uniqueNum(0), + is_inline(false), indent_level(0) {} const char *getPassName() const override { return "C++ backend"; } @@ -1721,7 +1724,7 @@ void CppWriter::printFunctionUses(const Function* F) { // initializers. if (GenerationType != GenFunction) { nl(Out) << "// Global Variable Definitions"; nl(Out); - for (const auto &GV : gvs) { + for (auto *GV : gvs) { if (GlobalVariable *Var = dyn_cast<GlobalVariable>(GV)) printVariableBody(Var); } @@ -2146,13 +2149,12 @@ char CppWriter::ID = 0; // External Interface declaration //===----------------------------------------------------------------------===// -bool CPPTargetMachine::addPassesToEmitFile(PassManagerBase &PM, - formatted_raw_ostream &o, - CodeGenFileType FileType, - bool DisableVerify, - AnalysisID StartAfter, - AnalysisID StopAfter) { - if (FileType != TargetMachine::CGFT_AssemblyFile) return true; - PM.add(new CppWriter(o)); +bool CPPTargetMachine::addPassesToEmitFile( + PassManagerBase &PM, raw_pwrite_stream &o, CodeGenFileType FileType, + bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter) { + if (FileType != TargetMachine::CGFT_AssemblyFile) + return true; + auto FOut = llvm::make_unique<formatted_raw_ostream>(o); + PM.add(new CppWriter(std::move(FOut))); return false; } diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h index 678a932..02d705e 100644 --- a/lib/Target/CppBackend/CPPTargetMachine.h +++ b/lib/Target/CppBackend/CPPTargetMachine.h @@ -29,7 +29,7 @@ struct CPPTargetMachine : public TargetMachine { : TargetMachine(T, "", TT, CPU, FS, Options) {} public: - bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, + bool addPassesToEmitFile(PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType, bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter) override; diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt index c6ffb96..758ccc7 100644 --- a/lib/Target/Hexagon/CMakeLists.txt +++ b/lib/Target/Hexagon/CMakeLists.txt @@ -15,6 +15,7 @@ add_llvm_target(HexagonCodeGen HexagonAsmPrinter.cpp HexagonCFGOptimizer.cpp HexagonCopyToCombine.cpp + HexagonExpandCondsets.cpp HexagonExpandPredSpillCode.cpp HexagonFixupHwLoops.cpp HexagonFrameLowering.cpp diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index 180762f..f0c81e0 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -222,21 +222,6 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } -static MCInstPrinter *createHexagonMCInstPrinter(const Target &T, - unsigned SyntaxVariant, - const MCAsmInfo &MAI, - const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { - if (SyntaxVariant == 0) - return(new HexagonInstPrinter(MAI, MII, MRI)); - else - return nullptr; -} - extern "C" void LLVMInitializeHexagonAsmPrinter() { RegisterAsmPrinter<HexagonAsmPrinter> X(TheHexagonTarget); - - TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget, - createHexagonMCInstPrinter); } diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp new file mode 100644 index 0000000..37ed173 --- /dev/null +++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp @@ -0,0 +1,1348 @@ +// Replace mux instructions with the corresponding legal instructions. +// It is meant to work post-SSA, but still on virtual registers. It was +// originally placed between register coalescing and machine instruction +// scheduler. +// In this place in the optimization sequence, live interval analysis had +// been performed, and the live intervals should be preserved. A large part +// of the code deals with preserving the liveness information. +// +// Liveness tracking aside, the main functionality of this pass is divided +// into two steps. The first step is to replace an instruction +// vreg0 = C2_mux vreg0, vreg1, vreg2 +// with a pair of conditional transfers +// vreg0 = A2_tfrt vreg0, vreg1 +// vreg0 = A2_tfrf vreg0, vreg2 +// It is the intention that the execution of this pass could be terminated +// after this step, and the code generated would be functionally correct. +// +// If the uses of the source values vreg1 and vreg2 are kills, and their +// definitions are predicable, then in the second step, the conditional +// transfers will then be rewritten as predicated instructions. E.g. +// vreg0 = A2_or vreg1, vreg2 +// vreg3 = A2_tfrt vreg99, vreg0<kill> +// will be rewritten as +// vreg3 = A2_port vreg99, vreg1, vreg2 +// +// This replacement has two variants: "up" and "down". Consider this case: +// vreg0 = A2_or vreg1, vreg2 +// ... [intervening instructions] ... +// vreg3 = A2_tfrt vreg99, vreg0<kill> +// variant "up": +// vreg3 = A2_port vreg99, vreg1, vreg2 +// ... [intervening instructions, vreg0->vreg3] ... +// [deleted] +// variant "down": +// [deleted] +// ... [intervening instructions] ... +// vreg3 = A2_port vreg99, vreg1, vreg2 +// +// Both, one or none of these variants may be valid, and checks are made +// to rule out inapplicable variants. +// +// As an additional optimization, before either of the two steps above is +// executed, the pass attempts to coalesce the target register with one of +// the source registers, e.g. given an instruction +// vreg3 = C2_mux vreg0, vreg1, vreg2 +// vreg3 will be coalesced with either vreg1 or vreg2. If this succeeds, +// the instruction would then be (for example) +// vreg3 = C2_mux vreg0, vreg3, vreg2 +// and, under certain circumstances, this could result in only one predicated +// instruction: +// vreg3 = A2_tfrf vreg0, vreg2 +// + +#define DEBUG_TYPE "expand-condsets" +#include "HexagonTargetMachine.h" + +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt<unsigned> OptTfrLimit("expand-condsets-tfr-limit", + cl::init(~0U), cl::Hidden, cl::desc("Max number of mux expansions")); +static cl::opt<unsigned> OptCoaLimit("expand-condsets-coa-limit", + cl::init(~0U), cl::Hidden, cl::desc("Max number of segment coalescings")); + +namespace llvm { + void initializeHexagonExpandCondsetsPass(PassRegistry&); + FunctionPass *createHexagonExpandCondsets(); +} + +namespace { + class HexagonExpandCondsets : public MachineFunctionPass { + public: + static char ID; + HexagonExpandCondsets() : + MachineFunctionPass(ID), HII(0), TRI(0), MRI(0), + LIS(0), CoaLimitActive(false), + TfrLimitActive(false), CoaCounter(0), TfrCounter(0) { + if (OptCoaLimit.getPosition()) + CoaLimitActive = true, CoaLimit = OptCoaLimit; + if (OptTfrLimit.getPosition()) + TfrLimitActive = true, TfrLimit = OptTfrLimit; + initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry()); + } + + virtual const char *getPassName() const { + return "Hexagon Expand Condsets"; + } + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); + AU.addPreserved<SlotIndexes>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + virtual bool runOnMachineFunction(MachineFunction &MF); + + private: + const HexagonInstrInfo *HII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + LiveIntervals *LIS; + + bool CoaLimitActive, TfrLimitActive; + unsigned CoaLimit, TfrLimit, CoaCounter, TfrCounter; + + struct RegisterRef { + RegisterRef(const MachineOperand &Op) : Reg(Op.getReg()), + Sub(Op.getSubReg()) {} + RegisterRef(unsigned R = 0, unsigned S = 0) : Reg(R), Sub(S) {} + bool operator== (RegisterRef RR) const { + return Reg == RR.Reg && Sub == RR.Sub; + } + bool operator!= (RegisterRef RR) const { return !operator==(RR); } + unsigned Reg, Sub; + }; + + typedef DenseMap<unsigned,unsigned> ReferenceMap; + enum { Sub_Low = 0x1, Sub_High = 0x2, Sub_None = (Sub_Low | Sub_High) }; + enum { Exec_Then = 0x10, Exec_Else = 0x20 }; + unsigned getMaskForSub(unsigned Sub); + bool isCondset(const MachineInstr *MI); + + void addRefToMap(RegisterRef RR, ReferenceMap &Map, unsigned Exec); + bool isRefInMap(RegisterRef, ReferenceMap &Map, unsigned Exec); + + LiveInterval::iterator nextSegment(LiveInterval &LI, SlotIndex S); + LiveInterval::iterator prevSegment(LiveInterval &LI, SlotIndex S); + void makeDefined(unsigned Reg, SlotIndex S, bool SetDef); + void makeUndead(unsigned Reg, SlotIndex S); + void shrinkToUses(unsigned Reg, LiveInterval &LI); + void updateKillFlags(unsigned Reg, LiveInterval &LI); + void terminateSegment(LiveInterval::iterator LT, SlotIndex S, + LiveInterval &LI); + void addInstrToLiveness(MachineInstr *MI); + void removeInstrFromLiveness(MachineInstr *MI); + + unsigned getCondTfrOpcode(const MachineOperand &SO, bool Cond); + MachineInstr *genTfrFor(MachineOperand &SrcOp, unsigned DstR, + unsigned DstSR, const MachineOperand &PredOp, bool Cond); + bool split(MachineInstr *MI); + bool splitInBlock(MachineBasicBlock &B); + + bool isPredicable(MachineInstr *MI); + MachineInstr *getReachingDefForPred(RegisterRef RD, + MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond); + bool canMoveOver(MachineInstr *MI, ReferenceMap &Defs, ReferenceMap &Uses); + bool canMoveMemTo(MachineInstr *MI, MachineInstr *ToI, bool IsDown); + void predicateAt(RegisterRef RD, MachineInstr *MI, + MachineBasicBlock::iterator Where, unsigned PredR, bool Cond); + void renameInRange(RegisterRef RO, RegisterRef RN, unsigned PredR, + bool Cond, MachineBasicBlock::iterator First, + MachineBasicBlock::iterator Last); + bool predicate(MachineInstr *TfrI, bool Cond); + bool predicateInBlock(MachineBasicBlock &B); + + void postprocessUndefImplicitUses(MachineBasicBlock &B); + void removeImplicitUses(MachineInstr *MI); + void removeImplicitUses(MachineBasicBlock &B); + + bool isIntReg(RegisterRef RR, unsigned &BW); + bool isIntraBlocks(LiveInterval &LI); + bool coalesceRegisters(RegisterRef R1, RegisterRef R2); + bool coalesceSegments(MachineFunction &MF); + }; +} + +char HexagonExpandCondsets::ID = 0; + + +unsigned HexagonExpandCondsets::getMaskForSub(unsigned Sub) { + switch (Sub) { + case Hexagon::subreg_loreg: + return Sub_Low; + case Hexagon::subreg_hireg: + return Sub_High; + case Hexagon::NoSubRegister: + return Sub_None; + } + llvm_unreachable("Invalid subregister"); +} + + +bool HexagonExpandCondsets::isCondset(const MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::C2_mux: + case Hexagon::C2_muxii: + case Hexagon::C2_muxir: + case Hexagon::C2_muxri: + case Hexagon::MUX64_rr: + return true; + break; + } + return false; +} + + +void HexagonExpandCondsets::addRefToMap(RegisterRef RR, ReferenceMap &Map, + unsigned Exec) { + unsigned Mask = getMaskForSub(RR.Sub) | Exec; + ReferenceMap::iterator F = Map.find(RR.Reg); + if (F == Map.end()) + Map.insert(std::make_pair(RR.Reg, Mask)); + else + F->second |= Mask; +} + + +bool HexagonExpandCondsets::isRefInMap(RegisterRef RR, ReferenceMap &Map, + unsigned Exec) { + ReferenceMap::iterator F = Map.find(RR.Reg); + if (F == Map.end()) + return false; + unsigned Mask = getMaskForSub(RR.Sub) | Exec; + if (Mask & F->second) + return true; + return false; +} + + +LiveInterval::iterator HexagonExpandCondsets::nextSegment(LiveInterval &LI, + SlotIndex S) { + for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { + if (I->start >= S) + return I; + } + return LI.end(); +} + + +LiveInterval::iterator HexagonExpandCondsets::prevSegment(LiveInterval &LI, + SlotIndex S) { + LiveInterval::iterator P = LI.end(); + for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { + if (I->end > S) + return P; + P = I; + } + return P; +} + + +/// Find the implicit use of register Reg in slot index S, and make sure +/// that the "defined" flag is set to SetDef. While the mux expansion is +/// going on, predicated instructions will have implicit uses of the +/// registers that are being defined. This is to keep any preceding +/// definitions live. If there is no preceding definition, the implicit +/// use will be marked as "undef", otherwise it will be "defined". This +/// function is used to update the flag. +void HexagonExpandCondsets::makeDefined(unsigned Reg, SlotIndex S, + bool SetDef) { + if (!S.isRegister()) + return; + MachineInstr *MI = LIS->getInstructionFromIndex(S); + assert(MI && "Expecting instruction"); + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg) + continue; + bool IsDef = !Op.isUndef(); + if (Op.isImplicit() && IsDef != SetDef) + Op.setIsUndef(!SetDef); + } +} + + +void HexagonExpandCondsets::makeUndead(unsigned Reg, SlotIndex S) { + // If S is a block boundary, then there can still be a dead def reaching + // this point. Instead of traversing the CFG, queue start points of all + // live segments that begin with a register, and end at a block boundary. + // This may "resurrect" some truly dead definitions, but doing so is + // harmless. + SmallVector<MachineInstr*,8> Defs; + if (S.isBlock()) { + LiveInterval &LI = LIS->getInterval(Reg); + for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { + if (!I->start.isRegister() || !I->end.isBlock()) + continue; + MachineInstr *MI = LIS->getInstructionFromIndex(I->start); + Defs.push_back(MI); + } + } else if (S.isRegister()) { + MachineInstr *MI = LIS->getInstructionFromIndex(S); + Defs.push_back(MI); + } + + for (unsigned i = 0, n = Defs.size(); i < n; ++i) { + MachineInstr *MI = Defs[i]; + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg) + continue; + Op.setIsDead(false); + } + } +} + + +/// Shrink the segments in the live interval for a given register to the last +/// use before each subsequent def. Unlike LiveIntervals::shrinkToUses, this +/// function will not mark any definitions of Reg as dead. The reason for this +/// is that this function is used while a MUX instruction is being expanded, +/// or while a conditional copy is undergoing predication. During these +/// processes, there may be defs present in the instruction sequence that have +/// not yet been removed, or there may be missing uses that have not yet been +/// added. We want to utilize LiveIntervals::shrinkToUses as much as possible, +/// but since it does not extend any intervals that are too short, we need to +/// pre-emptively extend them here in anticipation of further changes. +void HexagonExpandCondsets::shrinkToUses(unsigned Reg, LiveInterval &LI) { + SmallVector<MachineInstr*,4> Deads; + LIS->shrinkToUses(&LI, &Deads); + // Need to undo the deadification made by "shrinkToUses". It's easier to + // do it here, since we have a list of all instructions that were just + // marked as dead. + for (unsigned i = 0, n = Deads.size(); i < n; ++i) { + MachineInstr *MI = Deads[i]; + // Clear the "dead" flag. + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg) + continue; + Op.setIsDead(false); + } + // Extend the live segment to the beginning of the next one. + LiveInterval::iterator End = LI.end(); + SlotIndex S = LIS->getInstructionIndex(MI).getRegSlot(); + LiveInterval::iterator T = LI.FindSegmentContaining(S); + assert(T != End); + LiveInterval::iterator N = std::next(T); + if (N != End) + T->end = N->start; + else + T->end = LIS->getMBBEndIdx(MI->getParent()); + } + updateKillFlags(Reg, LI); +} + + +/// Given an updated live interval LI for register Reg, update the kill flags +/// in instructions using Reg to reflect the liveness changes. +void HexagonExpandCondsets::updateKillFlags(unsigned Reg, LiveInterval &LI) { + MRI->clearKillFlags(Reg); + for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { + SlotIndex EX = I->end; + if (!EX.isRegister()) + continue; + MachineInstr *MI = LIS->getInstructionFromIndex(EX); + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg) + continue; + // Only set the kill flag on the first encountered use of Reg in this + // instruction. + Op.setIsKill(true); + break; + } + } +} + + +/// When adding a new instruction to liveness, the newly added definition +/// will start a new live segment. This may happen at a position that falls +/// within an existing live segment. In such case that live segment needs to +/// be truncated to make room for the new segment. Ultimately, the truncation +/// will occur at the last use, but for now the segment can be terminated +/// right at the place where the new segment will start. The segments will be +/// shrunk-to-uses later. +void HexagonExpandCondsets::terminateSegment(LiveInterval::iterator LT, + SlotIndex S, LiveInterval &LI) { + // Terminate the live segment pointed to by LT within a live interval LI. + if (LT == LI.end()) + return; + + VNInfo *OldVN = LT->valno; + SlotIndex EX = LT->end; + LT->end = S; + // If LT does not end at a block boundary, the termination is done. + if (!EX.isBlock()) + return; + + // If LT ended at a block boundary, it's possible that its value number + // is picked up at the beginning other blocks. Create a new value number + // and change such blocks to use it instead. + VNInfo *NewVN = 0; + for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { + if (!I->start.isBlock() || I->valno != OldVN) + continue; + // Generate on-demand a new value number that is defined by the + // block beginning (i.e. -phi). + if (!NewVN) + NewVN = LI.getNextValue(I->start, LIS->getVNInfoAllocator()); + I->valno = NewVN; + } +} + + +/// Add the specified instruction to live intervals. This function is used +/// to update the live intervals while the program code is being changed. +/// Neither the expansion of a MUX, nor the predication are atomic, and this +/// function is used to update the live intervals while these transformations +/// are being done. +void HexagonExpandCondsets::addInstrToLiveness(MachineInstr *MI) { + SlotIndex MX = LIS->isNotInMIMap(MI) ? LIS->InsertMachineInstrInMaps(MI) + : LIS->getInstructionIndex(MI); + DEBUG(dbgs() << "adding liveness info for instr\n " << MX << " " << *MI); + + MX = MX.getRegSlot(); + bool Predicated = HII->isPredicated(MI); + MachineBasicBlock *MB = MI->getParent(); + + // Strip all implicit uses from predicated instructions. They will be + // added again, according to the updated information. + if (Predicated) + removeImplicitUses(MI); + + // For each def in MI we need to insert a new live segment starting at MX + // into the interval. If there already exists a live segment in the interval + // that contains MX, we need to terminate it at MX. + SmallVector<RegisterRef,2> Defs; + for (auto &Op : MI->operands()) + if (Op.isReg() && Op.isDef()) + Defs.push_back(RegisterRef(Op)); + + for (unsigned i = 0, n = Defs.size(); i < n; ++i) { + unsigned DefR = Defs[i].Reg; + LiveInterval &LID = LIS->getInterval(DefR); + DEBUG(dbgs() << "adding def " << PrintReg(DefR, TRI) + << " with interval\n " << LID << "\n"); + // If MX falls inside of an existing live segment, terminate it. + LiveInterval::iterator LT = LID.FindSegmentContaining(MX); + if (LT != LID.end()) + terminateSegment(LT, MX, LID); + DEBUG(dbgs() << "after terminating segment\n " << LID << "\n"); + + // Create a new segment starting from MX. + LiveInterval::iterator P = prevSegment(LID, MX), N = nextSegment(LID, MX); + SlotIndex EX; + VNInfo *VN = LID.getNextValue(MX, LIS->getVNInfoAllocator()); + if (N == LID.end()) { + // There is no live segment after MX. End this segment at the end of + // the block. + EX = LIS->getMBBEndIdx(MB); + } else { + // If the next segment starts at the block boundary, end the new segment + // at the boundary of the preceding block (i.e. the previous index). + // Otherwise, end the segment at the beginning of the next segment. In + // either case it will be "shrunk-to-uses" later. + EX = N->start.isBlock() ? N->start.getPrevIndex() : N->start; + } + if (Predicated) { + // Predicated instruction will have an implicit use of the defined + // register. This is necessary so that this definition will not make + // any previous definitions dead. If there are no previous live + // segments, still add the implicit use, but make it "undef". + // Because of the implicit use, the preceding definition is not + // dead. Mark is as such (if necessary). + MachineOperand ImpUse = MachineOperand::CreateReg(DefR, false, true); + ImpUse.setSubReg(Defs[i].Sub); + bool Undef = false; + if (P == LID.end()) + Undef = true; + else { + // If the previous segment extends to the end of the previous block, + // the end index may actually be the beginning of this block. If + // the previous segment ends at a block boundary, move it back by one, + // to get the proper block for it. + SlotIndex PE = P->end.isBlock() ? P->end.getPrevIndex() : P->end; + MachineBasicBlock *PB = LIS->getMBBFromIndex(PE); + if (PB != MB && !LIS->isLiveInToMBB(LID, MB)) + Undef = true; + } + if (!Undef) { + makeUndead(DefR, P->valno->def); + // We are adding a live use, so extend the previous segment to + // include it. + P->end = MX; + } else { + ImpUse.setIsUndef(true); + } + + if (!MI->readsRegister(DefR)) + MI->addOperand(ImpUse); + if (N != LID.end()) + makeDefined(DefR, N->start, true); + } + LiveRange::Segment NR = LiveRange::Segment(MX, EX, VN); + LID.addSegment(NR); + DEBUG(dbgs() << "added a new segment " << NR << "\n " << LID << "\n"); + shrinkToUses(DefR, LID); + DEBUG(dbgs() << "updated imp-uses: " << *MI); + LID.verify(); + } + + // For each use in MI: + // - If there is no live segment that contains MX for the used register, + // extend the previous one. Ignore implicit uses. + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isUse() || Op.isImplicit() || Op.isUndef()) + continue; + unsigned UseR = Op.getReg(); + LiveInterval &LIU = LIS->getInterval(UseR); + // Find the last segment P that starts before MX. + LiveInterval::iterator P = LIU.FindSegmentContaining(MX); + if (P == LIU.end()) + P = prevSegment(LIU, MX); + + assert(P != LIU.end() && "MI uses undefined register?"); + SlotIndex EX = P->end; + // If P contains MX, there is not much to do. + if (EX > MX) { + Op.setIsKill(false); + continue; + } + // Otherwise, extend P to "next(MX)". + P->end = MX.getNextIndex(); + Op.setIsKill(true); + // Get the old "kill" instruction, and remove the kill flag. + if (MachineInstr *KI = LIS->getInstructionFromIndex(MX)) + KI->clearRegisterKills(UseR, nullptr); + shrinkToUses(UseR, LIU); + LIU.verify(); + } +} + + +/// Update the live interval information to reflect the removal of the given +/// instruction from the program. As with "addInstrToLiveness", this function +/// is called while the program code is being changed. +void HexagonExpandCondsets::removeInstrFromLiveness(MachineInstr *MI) { + SlotIndex MX = LIS->getInstructionIndex(MI).getRegSlot(); + DEBUG(dbgs() << "removing instr\n " << MX << " " << *MI); + + // For each def in MI: + // If MI starts a live segment, merge this segment with the previous segment. + // + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef()) + continue; + unsigned DefR = Op.getReg(); + LiveInterval &LID = LIS->getInterval(DefR); + LiveInterval::iterator LT = LID.FindSegmentContaining(MX); + assert(LT != LID.end() && "Expecting live segments"); + DEBUG(dbgs() << "removing def at " << MX << " of " << PrintReg(DefR, TRI) + << " with interval\n " << LID << "\n"); + if (LT->start != MX) + continue; + + VNInfo *MVN = LT->valno; + if (LT != LID.begin()) { + // If the current live segment is not the first, the task is easy. If + // the previous segment continues into the current block, extend it to + // the end of the current one, and merge the value numbers. + // Otherwise, remove the current segment, and make the end of it "undef". + LiveInterval::iterator P = std::prev(LT); + SlotIndex PE = P->end.isBlock() ? P->end.getPrevIndex() : P->end; + MachineBasicBlock *MB = MI->getParent(); + MachineBasicBlock *PB = LIS->getMBBFromIndex(PE); + if (PB != MB && !LIS->isLiveInToMBB(LID, MB)) { + makeDefined(DefR, LT->end, false); + LID.removeSegment(*LT); + } else { + // Make the segments adjacent, so that merge-vn can also merge the + // segments. + P->end = LT->start; + makeUndead(DefR, P->valno->def); + LID.MergeValueNumberInto(MVN, P->valno); + } + } else { + LiveInterval::iterator N = std::next(LT); + LiveInterval::iterator RmB = LT, RmE = N; + while (N != LID.end()) { + // Iterate until the first register-based definition is found + // (i.e. skip all block-boundary entries). + LiveInterval::iterator Next = std::next(N); + if (N->start.isRegister()) { + makeDefined(DefR, N->start, false); + break; + } + if (N->end.isRegister()) { + makeDefined(DefR, N->end, false); + RmE = Next; + break; + } + RmE = Next; + N = Next; + } + // Erase the segments in one shot to avoid invalidating iterators. + LID.segments.erase(RmB, RmE); + } + + bool VNUsed = false; + for (LiveInterval::iterator I = LID.begin(), E = LID.end(); I != E; ++I) { + if (I->valno != MVN) + continue; + VNUsed = true; + break; + } + if (!VNUsed) + MVN->markUnused(); + + DEBUG(dbgs() << "new interval: "); + if (!LID.empty()) { + DEBUG(dbgs() << LID << "\n"); + LID.verify(); + } else { + DEBUG(dbgs() << "<empty>\n"); + LIS->removeInterval(DefR); + } + } + + // For uses there is nothing to do. The intervals will be updated via + // shrinkToUses. + SmallVector<unsigned,4> Uses; + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isUse()) + continue; + unsigned R = Op.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + Uses.push_back(R); + } + LIS->RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + for (unsigned i = 0, n = Uses.size(); i < n; ++i) { + LiveInterval &LI = LIS->getInterval(Uses[i]); + shrinkToUses(Uses[i], LI); + } +} + + +/// Get the opcode for a conditional transfer of the value in SO (source +/// operand). The condition (true/false) is given in Cond. +unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO, + bool Cond) { + using namespace Hexagon; + if (SO.isReg()) { + unsigned PhysR; + RegisterRef RS = SO; + if (TargetRegisterInfo::isVirtualRegister(RS.Reg)) { + const TargetRegisterClass *VC = MRI->getRegClass(RS.Reg); + assert(VC->begin() != VC->end() && "Empty register class"); + PhysR = *VC->begin(); + } else { + assert(TargetRegisterInfo::isPhysicalRegister(RS.Reg)); + PhysR = RS.Reg; + } + unsigned PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(PhysR, RS.Sub); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysS); + switch (RC->getSize()) { + case 4: + return Cond ? A2_tfrt : A2_tfrf; + case 8: + return Cond ? A2_tfrpt : A2_tfrpf; + } + llvm_unreachable("Invalid register operand"); + } + if (SO.isImm() || SO.isFPImm()) + return Cond ? C2_cmoveit : C2_cmoveif; + llvm_unreachable("Unexpected source operand"); +} + + +/// Generate a conditional transfer, copying the value SrcOp to the +/// destination register DstR:DstSR, and using the predicate register from +/// PredOp. The Cond argument specifies whether the predicate is to be +/// if(PredOp), or if(!PredOp). +MachineInstr *HexagonExpandCondsets::genTfrFor(MachineOperand &SrcOp, + unsigned DstR, unsigned DstSR, const MachineOperand &PredOp, bool Cond) { + MachineInstr *MI = SrcOp.getParent(); + MachineBasicBlock &B = *MI->getParent(); + MachineBasicBlock::iterator At = MI; + DebugLoc DL = MI->getDebugLoc(); + + // Don't avoid identity copies here (i.e. if the source and the destination + // are the same registers). It is actually better to generate them here, + // since this would cause the copy to potentially be predicated in the next + // step. The predication will remove such a copy if it is unable to + /// predicate. + + unsigned Opc = getCondTfrOpcode(SrcOp, Cond); + MachineInstr *TfrI = BuildMI(B, At, DL, HII->get(Opc)) + .addReg(DstR, RegState::Define, DstSR) + .addOperand(PredOp) + .addOperand(SrcOp); + // We don't want any kills yet. + TfrI->clearKillInfo(); + DEBUG(dbgs() << "created an initial copy: " << *TfrI); + return TfrI; +} + + +/// Replace a MUX instruction MI with a pair A2_tfrt/A2_tfrf. This function +/// performs all necessary changes to complete the replacement. +bool HexagonExpandCondsets::split(MachineInstr *MI) { + if (TfrLimitActive) { + if (TfrCounter >= TfrLimit) + return false; + TfrCounter++; + } + DEBUG(dbgs() << "\nsplitting BB#" << MI->getParent()->getNumber() + << ": " << *MI); + MachineOperand &MD = MI->getOperand(0); // Definition + MachineOperand &MP = MI->getOperand(1); // Predicate register + assert(MD.isDef()); + unsigned DR = MD.getReg(), DSR = MD.getSubReg(); + + // First, create the two invididual conditional transfers, and add each + // of them to the live intervals information. Do that first and then remove + // the old instruction from live intervals. + if (MachineInstr *TfrT = genTfrFor(MI->getOperand(2), DR, DSR, MP, true)) + addInstrToLiveness(TfrT); + if (MachineInstr *TfrF = genTfrFor(MI->getOperand(3), DR, DSR, MP, false)) + addInstrToLiveness(TfrF); + removeInstrFromLiveness(MI); + + return true; +} + + +/// Split all MUX instructions in the given block into pairs of contitional +/// transfers. +bool HexagonExpandCondsets::splitInBlock(MachineBasicBlock &B) { + bool Changed = false; + MachineBasicBlock::iterator I, E, NextI; + for (I = B.begin(), E = B.end(); I != E; I = NextI) { + NextI = std::next(I); + if (isCondset(I)) + Changed |= split(I); + } + return Changed; +} + + +bool HexagonExpandCondsets::isPredicable(MachineInstr *MI) { + if (HII->isPredicated(MI) || !HII->isPredicable(MI)) + return false; + if (MI->hasUnmodeledSideEffects() || MI->mayStore()) + return false; + // Reject instructions with multiple defs (e.g. post-increment loads). + bool HasDef = false; + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef()) + continue; + if (HasDef) + return false; + HasDef = true; + } + for (auto &Mo : MI->memoperands()) + if (Mo->isVolatile()) + return false; + return true; +} + + +/// Find the reaching definition for a predicated use of RD. The RD is used +/// under the conditions given by PredR and Cond, and this function will ignore +/// definitions that set RD under the opposite conditions. +MachineInstr *HexagonExpandCondsets::getReachingDefForPred(RegisterRef RD, + MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond) { + MachineBasicBlock &B = *UseIt->getParent(); + MachineBasicBlock::iterator I = UseIt, S = B.begin(); + if (I == S) + return 0; + + bool PredValid = true; + do { + --I; + MachineInstr *MI = &*I; + // Check if this instruction can be ignored, i.e. if it is predicated + // on the complementary condition. + if (PredValid && HII->isPredicated(MI)) { + if (MI->readsRegister(PredR) && (Cond != HII->isPredicatedTrue(MI))) + continue; + } + + // Check the defs. If the PredR is defined, invalidate it. If RD is + // defined, return the instruction or 0, depending on the circumstances. + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef()) + continue; + RegisterRef RR = Op; + if (RR.Reg == PredR) { + PredValid = false; + continue; + } + if (RR.Reg != RD.Reg) + continue; + // If the "Reg" part agrees, there is still the subregister to check. + // If we are looking for vreg1:loreg, we can skip vreg1:hireg, but + // not vreg1 (w/o subregisters). + if (RR.Sub == RD.Sub) + return MI; + if (RR.Sub == 0 || RD.Sub == 0) + return 0; + // We have different subregisters, so we can continue looking. + } + } while (I != S); + + return 0; +} + + +/// Check if the instruction MI can be safely moved over a set of instructions +/// whose side-effects (in terms of register defs and uses) are expressed in +/// the maps Defs and Uses. These maps reflect the conditional defs and uses +/// that depend on the same predicate register to allow moving instructions +/// over instructions predicated on the opposite condition. +bool HexagonExpandCondsets::canMoveOver(MachineInstr *MI, ReferenceMap &Defs, + ReferenceMap &Uses) { + // In order to be able to safely move MI over instructions that define + // "Defs" and use "Uses", no def operand from MI can be defined or used + // and no use operand can be defined. + for (auto &Op : MI->operands()) { + if (!Op.isReg()) + continue; + RegisterRef RR = Op; + // For physical register we would need to check register aliases, etc. + // and we don't want to bother with that. It would be of little value + // before the actual register rewriting (from virtual to physical). + if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) + return false; + // No redefs for any operand. + if (isRefInMap(RR, Defs, Exec_Then)) + return false; + // For defs, there cannot be uses. + if (Op.isDef() && isRefInMap(RR, Uses, Exec_Then)) + return false; + } + return true; +} + + +/// Check if the instruction accessing memory (TheI) can be moved to the +/// location ToI. +bool HexagonExpandCondsets::canMoveMemTo(MachineInstr *TheI, MachineInstr *ToI, + bool IsDown) { + bool IsLoad = TheI->mayLoad(), IsStore = TheI->mayStore(); + if (!IsLoad && !IsStore) + return true; + if (HII->areMemAccessesTriviallyDisjoint(TheI, ToI)) + return true; + if (TheI->hasUnmodeledSideEffects()) + return false; + + MachineBasicBlock::iterator StartI = IsDown ? TheI : ToI; + MachineBasicBlock::iterator EndI = IsDown ? ToI : TheI; + bool Ordered = TheI->hasOrderedMemoryRef(); + + // Search for aliased memory reference in (StartI, EndI). + for (MachineBasicBlock::iterator I = std::next(StartI); I != EndI; ++I) { + MachineInstr *MI = &*I; + if (MI->hasUnmodeledSideEffects()) + return false; + bool L = MI->mayLoad(), S = MI->mayStore(); + if (!L && !S) + continue; + if (Ordered && MI->hasOrderedMemoryRef()) + return false; + + bool Conflict = (L && IsStore) || S; + if (Conflict) + return false; + } + return true; +} + + +/// Generate a predicated version of MI (where the condition is given via +/// PredR and Cond) at the point indicated by Where. +void HexagonExpandCondsets::predicateAt(RegisterRef RD, MachineInstr *MI, + MachineBasicBlock::iterator Where, unsigned PredR, bool Cond) { + // The problem with updating live intervals is that we can move one def + // past another def. In particular, this can happen when moving an A2_tfrt + // over an A2_tfrf defining the same register. From the point of view of + // live intervals, these two instructions are two separate definitions, + // and each one starts another live segment. LiveIntervals's "handleMove" + // does not allow such moves, so we need to handle it ourselves. To avoid + // invalidating liveness data while we are using it, the move will be + // implemented in 4 steps: (1) add a clone of the instruction MI at the + // target location, (2) update liveness, (3) delete the old instruction, + // and (4) update liveness again. + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = Where->getDebugLoc(); // "Where" points to an instruction. + unsigned Opc = MI->getOpcode(); + unsigned PredOpc = HII->getCondOpcode(Opc, !Cond); + MachineInstrBuilder MB = BuildMI(B, Where, DL, HII->get(PredOpc)); + unsigned Ox = 0, NP = MI->getNumOperands(); + // Skip all defs from MI first. + while (Ox < NP) { + MachineOperand &MO = MI->getOperand(Ox); + if (!MO.isReg() || !MO.isDef()) + break; + Ox++; + } + // Add the new def, then the predicate register, then the rest of the + // operands. + MB.addReg(RD.Reg, RegState::Define, RD.Sub); + MB.addReg(PredR); + while (Ox < NP) { + MachineOperand &MO = MI->getOperand(Ox); + if (!MO.isReg() || !MO.isImplicit()) + MB.addOperand(MO); + Ox++; + } + + MachineFunction &MF = *B.getParent(); + MachineInstr::mmo_iterator I = MI->memoperands_begin(); + unsigned NR = std::distance(I, MI->memoperands_end()); + MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(NR); + for (unsigned i = 0; i < NR; ++i) + MemRefs[i] = *I++; + MB.setMemRefs(MemRefs, MemRefs+NR); + + MachineInstr *NewI = MB; + NewI->clearKillInfo(); + addInstrToLiveness(NewI); +} + + +/// In the range [First, Last], rename all references to the "old" register RO +/// to the "new" register RN, but only in instructions predicated on the given +/// condition. +void HexagonExpandCondsets::renameInRange(RegisterRef RO, RegisterRef RN, + unsigned PredR, bool Cond, MachineBasicBlock::iterator First, + MachineBasicBlock::iterator Last) { + MachineBasicBlock::iterator End = std::next(Last); + for (MachineBasicBlock::iterator I = First; I != End; ++I) { + MachineInstr *MI = &*I; + // Do not touch instructions that are not predicated, or are predicated + // on the opposite condition. + if (!HII->isPredicated(MI)) + continue; + if (!MI->readsRegister(PredR) || (Cond != HII->isPredicatedTrue(MI))) + continue; + + for (auto &Op : MI->operands()) { + if (!Op.isReg() || RO != RegisterRef(Op)) + continue; + Op.setReg(RN.Reg); + Op.setSubReg(RN.Sub); + // In practice, this isn't supposed to see any defs. + assert(!Op.isDef() && "Not expecting a def"); + } + } +} + + +/// For a given conditional copy, predicate the definition of the source of +/// the copy under the given condition (using the same predicate register as +/// the copy). +bool HexagonExpandCondsets::predicate(MachineInstr *TfrI, bool Cond) { + // TfrI - A2_tfr[tf] Instruction (not A2_tfrsi). + unsigned Opc = TfrI->getOpcode(); + (void)Opc; + assert(Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf); + DEBUG(dbgs() << "\nattempt to predicate if-" << (Cond ? "true" : "false") + << ": " << *TfrI); + + MachineOperand &MD = TfrI->getOperand(0); + MachineOperand &MP = TfrI->getOperand(1); + MachineOperand &MS = TfrI->getOperand(2); + // The source operand should be a <kill>. This is not strictly necessary, + // but it makes things a lot simpler. Otherwise, we would need to rename + // some registers, which would complicate the transformation considerably. + if (!MS.isKill()) + return false; + + RegisterRef RT(MS); + unsigned PredR = MP.getReg(); + MachineInstr *DefI = getReachingDefForPred(RT, TfrI, PredR, Cond); + if (!DefI || !isPredicable(DefI)) + return false; + + DEBUG(dbgs() << "Source def: " << *DefI); + + // Collect the information about registers defined and used between the + // DefI and the TfrI. + // Map: reg -> bitmask of subregs + ReferenceMap Uses, Defs; + MachineBasicBlock::iterator DefIt = DefI, TfrIt = TfrI; + + // Check if the predicate register is valid between DefI and TfrI. + // If it is, we can then ignore instructions predicated on the negated + // conditions when collecting def and use information. + bool PredValid = true; + for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) { + if (!I->modifiesRegister(PredR, 0)) + continue; + PredValid = false; + break; + } + + for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) { + MachineInstr *MI = &*I; + // If this instruction is predicated on the same register, it could + // potentially be ignored. + // By default assume that the instruction executes on the same condition + // as TfrI (Exec_Then), and also on the opposite one (Exec_Else). + unsigned Exec = Exec_Then | Exec_Else; + if (PredValid && HII->isPredicated(MI) && MI->readsRegister(PredR)) + Exec = (Cond == HII->isPredicatedTrue(MI)) ? Exec_Then : Exec_Else; + + for (auto &Op : MI->operands()) { + if (!Op.isReg()) + continue; + // We don't want to deal with physical registers. The reason is that + // they can be aliased with other physical registers. Aliased virtual + // registers must share the same register number, and can only differ + // in the subregisters, which we are keeping track of. Physical + // registers ters no longer have subregisters---their super- and + // subregisters are other physical registers, and we are not checking + // that. + RegisterRef RR = Op; + if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) + return false; + + ReferenceMap &Map = Op.isDef() ? Defs : Uses; + addRefToMap(RR, Map, Exec); + } + } + + // The situation: + // RT = DefI + // ... + // RD = TfrI ..., RT + + // If the register-in-the-middle (RT) is used or redefined between + // DefI and TfrI, we may not be able proceed with this transformation. + // We can ignore a def that will not execute together with TfrI, and a + // use that will. If there is such a use (that does execute together with + // TfrI), we will not be able to move DefI down. If there is a use that + // executed if TfrI's condition is false, then RT must be available + // unconditionally (cannot be predicated). + // Essentially, we need to be able to rename RT to RD in this segment. + if (isRefInMap(RT, Defs, Exec_Then) || isRefInMap(RT, Uses, Exec_Else)) + return false; + RegisterRef RD = MD; + // If the predicate register is defined between DefI and TfrI, the only + // potential thing to do would be to move the DefI down to TfrI, and then + // predicate. The reaching def (DefI) must be movable down to the location + // of the TfrI. + // If the target register of the TfrI (RD) is not used or defined between + // DefI and TfrI, consider moving TfrI up to DefI. + bool CanUp = canMoveOver(TfrI, Defs, Uses); + bool CanDown = canMoveOver(DefI, Defs, Uses); + // The TfrI does not access memory, but DefI could. Check if it's safe + // to move DefI down to TfrI. + if (DefI->mayLoad() || DefI->mayStore()) + if (!canMoveMemTo(DefI, TfrI, true)) + CanDown = false; + + DEBUG(dbgs() << "Can move up: " << (CanUp ? "yes" : "no") + << ", can move down: " << (CanDown ? "yes\n" : "no\n")); + MachineBasicBlock::iterator PastDefIt = std::next(DefIt); + if (CanUp) + predicateAt(RD, DefI, PastDefIt, PredR, Cond); + else if (CanDown) + predicateAt(RD, DefI, TfrIt, PredR, Cond); + else + return false; + + if (RT != RD) + renameInRange(RT, RD, PredR, Cond, PastDefIt, TfrIt); + + // Delete the user of RT first (it should work either way, but this order + // of deleting is more natural). + removeInstrFromLiveness(TfrI); + removeInstrFromLiveness(DefI); + return true; +} + + +/// Predicate all cases of conditional copies in the specified block. +bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B) { + bool Changed = false; + MachineBasicBlock::iterator I, E, NextI; + for (I = B.begin(), E = B.end(); I != E; I = NextI) { + NextI = std::next(I); + unsigned Opc = I->getOpcode(); + if (Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf) { + bool Done = predicate(I, (Opc == Hexagon::A2_tfrt)); + if (!Done) { + // If we didn't predicate I, we may need to remove it in case it is + // an "identity" copy, e.g. vreg1 = A2_tfrt vreg2, vreg1. + if (RegisterRef(I->getOperand(0)) == RegisterRef(I->getOperand(2))) + removeInstrFromLiveness(I); + } + Changed |= Done; + } + } + return Changed; +} + + +void HexagonExpandCondsets::removeImplicitUses(MachineInstr *MI) { + for (unsigned i = MI->getNumOperands(); i > 0; --i) { + MachineOperand &MO = MI->getOperand(i-1); + if (MO.isReg() && MO.isUse() && MO.isImplicit()) + MI->RemoveOperand(i-1); + } +} + + +void HexagonExpandCondsets::removeImplicitUses(MachineBasicBlock &B) { + for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) { + MachineInstr *MI = &*I; + if (HII->isPredicated(MI)) + removeImplicitUses(MI); + } +} + + +void HexagonExpandCondsets::postprocessUndefImplicitUses(MachineBasicBlock &B) { + // Implicit uses that are "undef" are only meaningful (outside of the + // internals of this pass) when the instruction defines a subregister, + // and the implicit-undef use applies to the defined register. In such + // cases, the proper way to record the information in the IR is to mark + // the definition as "undef", which will be interpreted as "read-undef". + typedef SmallSet<unsigned,2> RegisterSet; + for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) { + MachineInstr *MI = &*I; + RegisterSet Undefs; + for (unsigned i = MI->getNumOperands(); i > 0; --i) { + MachineOperand &MO = MI->getOperand(i-1); + if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.isUndef()) { + MI->RemoveOperand(i-1); + Undefs.insert(MO.getReg()); + } + } + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef() || !Op.getSubReg()) + continue; + if (Undefs.count(Op.getReg())) + Op.setIsUndef(true); + } + } +} + + +bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) { + if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) + return false; + const TargetRegisterClass *RC = MRI->getRegClass(RR.Reg); + if (RC == &Hexagon::IntRegsRegClass) { + BW = 32; + return true; + } + if (RC == &Hexagon::DoubleRegsRegClass) { + BW = (RR.Sub != 0) ? 32 : 64; + return true; + } + return false; +} + + +bool HexagonExpandCondsets::isIntraBlocks(LiveInterval &LI) { + for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { + LiveRange::Segment &LR = *I; + // Range must start at a register... + if (!LR.start.isRegister()) + return false; + // ...and end in a register or in a dead slot. + if (!LR.end.isRegister() && !LR.end.isDead()) + return false; + } + return true; +} + + +bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) { + if (CoaLimitActive) { + if (CoaCounter >= CoaLimit) + return false; + CoaCounter++; + } + unsigned BW1, BW2; + if (!isIntReg(R1, BW1) || !isIntReg(R2, BW2) || BW1 != BW2) + return false; + if (MRI->isLiveIn(R1.Reg)) + return false; + if (MRI->isLiveIn(R2.Reg)) + return false; + + LiveInterval &L1 = LIS->getInterval(R1.Reg); + LiveInterval &L2 = LIS->getInterval(R2.Reg); + bool Overlap = L1.overlaps(L2); + + DEBUG(dbgs() << "compatible registers: (" + << (Overlap ? "overlap" : "disjoint") << ")\n " + << PrintReg(R1.Reg, TRI, R1.Sub) << " " << L1 << "\n " + << PrintReg(R2.Reg, TRI, R2.Sub) << " " << L2 << "\n"); + if (R1.Sub || R2.Sub) + return false; + if (Overlap) + return false; + + // Coalescing could have a negative impact on scheduling, so try to limit + // to some reasonable extent. Only consider coalescing segments, when one + // of them does not cross basic block boundaries. + if (!isIntraBlocks(L1) && !isIntraBlocks(L2)) + return false; + + MRI->replaceRegWith(R2.Reg, R1.Reg); + + // Move all live segments from L2 to L1. + typedef DenseMap<VNInfo*,VNInfo*> ValueInfoMap; + ValueInfoMap VM; + for (LiveInterval::iterator I = L2.begin(), E = L2.end(); I != E; ++I) { + VNInfo *NewVN, *OldVN = I->valno; + ValueInfoMap::iterator F = VM.find(OldVN); + if (F == VM.end()) { + NewVN = L1.getNextValue(I->valno->def, LIS->getVNInfoAllocator()); + VM.insert(std::make_pair(OldVN, NewVN)); + } else { + NewVN = F->second; + } + L1.addSegment(LiveRange::Segment(I->start, I->end, NewVN)); + } + while (L2.begin() != L2.end()) + L2.removeSegment(*L2.begin()); + + updateKillFlags(R1.Reg, L1); + DEBUG(dbgs() << "coalesced: " << L1 << "\n"); + L1.verify(); + + return true; +} + + +/// Attempt to coalesce one of the source registers to a MUX intruction with +/// the destination register. This could lead to having only one predicated +/// instruction in the end instead of two. +bool HexagonExpandCondsets::coalesceSegments(MachineFunction &MF) { + SmallVector<MachineInstr*,16> Condsets; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock &B = *I; + for (MachineBasicBlock::iterator J = B.begin(), F = B.end(); J != F; ++J) { + MachineInstr *MI = &*J; + if (!isCondset(MI)) + continue; + MachineOperand &S1 = MI->getOperand(2), &S2 = MI->getOperand(3); + if (!S1.isReg() && !S2.isReg()) + continue; + Condsets.push_back(MI); + } + } + + bool Changed = false; + for (unsigned i = 0, n = Condsets.size(); i < n; ++i) { + MachineInstr *CI = Condsets[i]; + RegisterRef RD = CI->getOperand(0); + RegisterRef RP = CI->getOperand(1); + MachineOperand &S1 = CI->getOperand(2), &S2 = CI->getOperand(3); + bool Done = false; + // Consider this case: + // vreg1 = instr1 ... + // vreg2 = instr2 ... + // vreg0 = C2_mux ..., vreg1, vreg2 + // If vreg0 was coalesced with vreg1, we could end up with the following + // code: + // vreg0 = instr1 ... + // vreg2 = instr2 ... + // vreg0 = A2_tfrf ..., vreg2 + // which will later become: + // vreg0 = instr1 ... + // vreg0 = instr2_cNotPt ... + // i.e. there will be an unconditional definition (instr1) of vreg0 + // followed by a conditional one. The output dependency was there before + // and it unavoidable, but if instr1 is predicable, we will no longer be + // able to predicate it here. + // To avoid this scenario, don't coalesce the destination register with + // a source register that is defined by a predicable instruction. + if (S1.isReg()) { + RegisterRef RS = S1; + MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, true); + if (!RDef || !HII->isPredicable(RDef)) + Done = coalesceRegisters(RD, RegisterRef(S1)); + } + if (!Done && S2.isReg()) { + RegisterRef RS = S2; + MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, false); + if (!RDef || !HII->isPredicable(RDef)) + Done = coalesceRegisters(RD, RegisterRef(S2)); + } + Changed |= Done; + } + return Changed; +} + + +bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) { + HII = static_cast<const HexagonInstrInfo*>(MF.getSubtarget().getInstrInfo()); + TRI = MF.getSubtarget().getRegisterInfo(); + LIS = &getAnalysis<LiveIntervals>(); + MRI = &MF.getRegInfo(); + + bool Changed = false; + + // Try to coalesce the target of a mux with one of its sources. + // This could eliminate a register copy in some circumstances. + Changed |= coalesceSegments(MF); + + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + // First, simply split all muxes into a pair of conditional transfers + // and update the live intervals to reflect the new arrangement. + // This is done mainly to make the live interval update simpler, than it + // would be while trying to predicate instructions at the same time. + Changed |= splitInBlock(*I); + // Traverse all blocks and collapse predicable instructions feeding + // conditional transfers into predicated instructions. + // Walk over all the instructions again, so we may catch pre-existing + // cases that were not created in the previous step. + Changed |= predicateInBlock(*I); + } + + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + postprocessUndefImplicitUses(*I); + return Changed; +} + + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +static void initializePassOnce(PassRegistry &Registry) { + const char *Name = "Hexagon Expand Condsets"; + PassInfo *PI = new PassInfo(Name, "expand-condsets", + &HexagonExpandCondsets::ID, 0, false, false); + Registry.registerPass(*PI, true); +} + +void llvm::initializeHexagonExpandCondsetsPass(PassRegistry &Registry) { + CALL_ONCE_INITIALIZATION(initializePassOnce) +} + + +FunctionPass *llvm::createHexagonExpandCondsets() { + return new HexagonExpandCondsets(); +} diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index a2209ab..63900e0 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -316,6 +316,7 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), /*isVolatile=*/false, /*AlwaysInline=*/false, + /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo()); } @@ -1716,6 +1717,14 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SUBC, MVT::i32, Expand); setOperationAction(ISD::SUBC, MVT::i64, Expand); + // Only add and sub that detect overflow are the saturating ones. + for (MVT VT : MVT::integer_valuetypes()) { + setOperationAction(ISD::UADDO, VT, Expand); + setOperationAction(ISD::SADDO, VT, Expand); + setOperationAction(ISD::USUBO, VT, Expand); + setOperationAction(ISD::SSUBO, VT, Expand); + } + setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::CTPOP, MVT::i64, Expand); setOperationAction(ISD::CTTZ, MVT::i32, Expand); @@ -2106,7 +2115,7 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // is Big Endian. unsigned OpIdx = NElts - i - 1; SDValue Operand = BVN->getOperand(OpIdx); - if (dyn_cast<ConstantSDNode>(Operand)) + if (isa<ConstantSDNode>(Operand)) // This operand is already in ConstVal. continue; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index fbf1ca9..ff4bcad 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -845,8 +845,7 @@ bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const { return ((F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask); } -int HexagonInstrInfo:: -getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const { +int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const { enum Hexagon::PredSense inPredSense; inPredSense = invertPredicate ? Hexagon::PredSense_false : Hexagon::PredSense_true; @@ -884,7 +883,7 @@ PredicateInstruction(MachineInstr *MI, // This will change MI's opcode to its predicate version. // However, its operand list is still the old one, i.e. the // non-predicate one. - MI->setDesc(get(getMatchingCondBranchOpcode(Opc, invertJump))); + MI->setDesc(get(getCondOpcode(Opc, invertJump))); int oper = -1; unsigned int GAIdx = 0; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index 2644248..284dde1 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -216,9 +216,7 @@ public: short getNonExtOpcode(const MachineInstr *MI) const; bool PredOpcodeHasJMP_c(Opcode_t Opcode) const; bool PredOpcodeHasNot(Opcode_t Opcode) const; - -private: - int getMatchingCondBranchOpcode(int Opc, bool sense) const; + int getCondOpcode(int Opc, bool sense) const; }; diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp index 1717ae3..d61cc54 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -72,7 +72,7 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS, const TargetMachine &TM) - : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU.str()), + : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU), InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), TSInfo(*TM.getDataLayout()), FrameLowering() { diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 48b0bc8..0679866 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -27,11 +27,15 @@ using namespace llvm; static cl:: opt<bool> DisableHardwareLoops("disable-hexagon-hwloops", - cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target")); + cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target")); static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Disable Hexagon CFG Optimization")); + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Hexagon CFG Optimization")); + +static cl::opt<bool> EnableExpandCondsets("hexagon-expand-condsets", + cl::init(true), cl::Hidden, cl::ZeroOrMore, + cl::desc("Early expansion of MUX")); /// HexagonTargetMachineModule - Note that this is used on hosts that @@ -55,6 +59,10 @@ static MachineSchedRegistry SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler", createVLIWMachineSched); +namespace llvm { + FunctionPass *createHexagonExpandCondsets(); +} + /// HexagonTargetMachine ctor - Create an ILP32 architecture model. /// @@ -79,7 +87,15 @@ namespace { class HexagonPassConfig : public TargetPassConfig { public: HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} + : TargetPassConfig(TM, PM) { + bool NoOpt = (TM->getOptLevel() == CodeGenOpt::None); + if (!NoOpt) { + if (EnableExpandCondsets) { + Pass *Exp = createHexagonExpandCondsets(); + insertPass(&RegisterCoalescerID, IdentifyingPassPtr(Exp)); + } + } + } HexagonTargetMachine &getHexagonTargetMachine() const { return getTM<HexagonTargetMachine>(); diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index bdccf88..155aa9e 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -57,7 +57,7 @@ public: ELFHexagonAsmBackend(Target const &T, uint8_t OSABI) : HexagonAsmBackend(T), OSABI(OSABI) {} - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { StringRef CPU("HexagonV4"); return createHexagonELFObjectWriter(OS, OSABI, CPU); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp index 4a3ac8c..fde935b 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp @@ -27,8 +27,8 @@ private: public: HexagonELFObjectWriter(uint8_t OSABI, StringRef C); - virtual unsigned GetRelocType(MCValue const &Target, MCFixup const &Fixup, - bool IsPCRel) const override; + unsigned GetRelocType(MCValue const &Target, MCFixup const &Fixup, + bool IsPCRel) const override; }; } @@ -55,9 +55,9 @@ unsigned HexagonELFObjectWriter::GetRelocType(MCValue const &/*Target*/, return Type; } -MCObjectWriter *llvm::createHexagonELFObjectWriter(raw_ostream &OS, +MCObjectWriter *llvm::createHexagonELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, StringRef CPU) { MCELFObjectTargetWriter *MOTW = new HexagonELFObjectWriter(OSABI, CPU); return createELFObjectWriter(MOTW, OS, /*IsLittleEndian*/ true); -}
\ No newline at end of file +} diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp index 6c87c9f..ec55234 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp @@ -78,7 +78,8 @@ StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const { } void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &O, - StringRef Annot) { + StringRef Annot, + const MCSubtargetInfo &STI) { const char startPacket = '{', endPacket = '}'; // TODO: add outer HW loop when it's supported too. @@ -94,7 +95,7 @@ void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &O, Nop.setOpcode (Hexagon::A2_nop); HexagonMCInstrInfo::setPacketBegin (Nop, HexagonMCInstrInfo::isPacketBegin(*MI)); - printInst (&Nop, O, NoAnnot); + printInst (&Nop, O, NoAnnot, STI); } // Close the packet. diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h index d02243b..98fb99b 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h @@ -25,7 +25,8 @@ namespace llvm { MCRegisterInfo const &MRI) : MCInstPrinter(MAI, MII, MRI), MII(MII) {} - void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot) override; + void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; virtual StringRef getOpcodeName(unsigned Opcode) const; void printInstruction(const MCInst *MI, raw_ostream &O); StringRef getRegName(unsigned RegNo) const; diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index c63bf32..2e10d81 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -75,13 +75,16 @@ static MCCodeGenInfo *createHexagonMCCodeGenInfo(StringRef TT, Reloc::Model RM, X->InitMCCodeGenInfo(Reloc::Static, CM, OL); return X; } -static MCInstPrinter *createHexagonMCInstPrinter(const Target &T, + +static MCInstPrinter *createHexagonMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { - return new HexagonInstPrinter(MAI, MII, MRI); + const MCRegisterInfo &MRI) { + if (SyntaxVariant == 0) + return(new HexagonInstPrinter(MAI, MII, MRI)); + else + return nullptr; } // Force static initialization. diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h index 17072d9..de63fd2 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h @@ -27,6 +27,7 @@ class MCSubtargetInfo; class Target; class StringRef; class raw_ostream; +class raw_pwrite_stream; extern Target TheHexagonTarget; @@ -40,8 +41,8 @@ MCAsmBackend *createHexagonAsmBackend(Target const &T, MCRegisterInfo const &MRI, StringRef TT, StringRef CPU); -MCObjectWriter *createHexagonELFObjectWriter(raw_ostream &OS, uint8_t OSABI, - StringRef CPU); +MCObjectWriter *createHexagonELFObjectWriter(raw_pwrite_stream &OS, + uint8_t OSABI, StringRef CPU); } // End llvm namespace diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp index acf1214..6c43d97 100644 --- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp +++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp @@ -27,7 +27,7 @@ using namespace llvm; #include "MSP430GenAsmWriter.inc" void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { + StringRef Annot, const MCSubtargetInfo &STI) { printInstruction(MI, O); printAnnotation(O, Annot); } diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h index 7fae505..70141a9 100644 --- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h +++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h @@ -25,7 +25,8 @@ namespace llvm { const MCRegisterInfo &MRI) : MCInstPrinter(MAI, MII, MRI) {} - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp index 4c70803..775c0b2 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp @@ -58,12 +58,11 @@ static MCCodeGenInfo *createMSP430MCCodeGenInfo(StringRef TT, Reloc::Model RM, return X; } -static MCInstPrinter *createMSP430MCInstPrinter(const Target &T, +static MCInstPrinter *createMSP430MCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { + const MCRegisterInfo &MRI) { if (SyntaxVariant == 0) return new MSP430InstPrinter(MAI, MII, MRI); return nullptr; diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 18141a6..08f41a8 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -645,6 +645,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, Flags.getByValAlign(), /*isVolatile*/false, /*AlwaysInline=*/true, + /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo()); } else { diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index 68868b6..9266c3b 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -102,12 +102,6 @@ namespace llvm { const std::string &Constraint, MVT VT) const override; - unsigned getInlineAsmMemConstraint( - const std::string &ConstraintCode) const override { - // FIXME: Map different constraints differently. - return InlineAsm::Constraint_m; - } - /// isTruncateFree - Return true if it's free to truncate a value of type /// Ty1 to type Ty2. e.g. On msp430 it's free to truncate a i16 value in /// register R15W to i8 by referencing its sub-register R15B. diff --git a/lib/Target/MSP430/MSP430MCInstLower.cpp b/lib/Target/MSP430/MSP430MCInstLower.cpp index 05352a2..c63a57c 100644 --- a/lib/Target/MSP430/MSP430MCInstLower.cpp +++ b/lib/Target/MSP430/MSP430MCInstLower.cpp @@ -62,7 +62,7 @@ GetJumpTableSymbol(const MachineOperand &MO) const { } // Create a symbol for the name. - return Ctx.GetOrCreateSymbol(Name.str()); + return Ctx.GetOrCreateSymbol(Name); } MCSymbol *MSP430MCInstLower:: @@ -79,7 +79,7 @@ GetConstantPoolIndexSymbol(const MachineOperand &MO) const { } // Create a symbol for the name. - return Ctx.GetOrCreateSymbol(Name.str()); + return Ctx.GetOrCreateSymbol(Name); } MCSymbol *MSP430MCInstLower:: diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 6401bc1..6f7e3c1 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -220,6 +220,7 @@ class MipsAsmParser : public MCTargetAsmParser { bool parseDirectiveNaN(); bool parseDirectiveSet(); bool parseDirectiveOption(); + bool parseInsnDirective(); bool parseSetAtDirective(); bool parseSetNoAtDirective(); @@ -272,7 +273,10 @@ class MipsAsmParser : public MCTargetAsmParser { unsigned getGPR(int RegNo); - int getATReg(SMLoc Loc); + /// Returns the internal register number for the current AT. Also checks if + /// the current AT is unavailable (set to $0) and gives an error if it is. + /// This should be used in pseudo-instruction expansions which need AT. + unsigned getATReg(SMLoc Loc); bool processInstruction(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions); @@ -1713,7 +1717,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, // FIXME: gas has a special case for values that are 000...1111, which // becomes a li -1 and then a dsrl if (0 <= ImmValue && ImmValue <= 65535) { - // For 0 <= j <= 65535. + // For unsigned and positive signed 16-bit values (0 <= j <= 65535): // li d,j => ori d,$zero,j tmpInst.setOpcode(Mips::ORi); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); @@ -1721,7 +1725,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, tmpInst.addOperand(MCOperand::CreateImm(ImmValue)); Instructions.push_back(tmpInst); } else if (ImmValue < 0 && ImmValue >= -32768) { - // For -32768 <= j < 0. + // For negative signed 16-bit values (-32768 <= j < 0): // li d,j => addiu d,$zero,j tmpInst.setOpcode(Mips::ADDiu); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); @@ -1729,8 +1733,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, tmpInst.addOperand(MCOperand::CreateImm(ImmValue)); Instructions.push_back(tmpInst); } else if ((ImmValue & 0xffffffff) == ImmValue) { - // For any value of j that is representable as a 32-bit integer, create - // a sequence of: + // For all other values which are representable as a 32-bit integer: // li d,j => lui d,hi16(j) // ori d,d,lo16(j) tmpInst.setOpcode(Mips::LUi); @@ -1752,8 +1755,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, // | 16-bytes | 16-bytes | 16-bytes | // |__________|__________|__________| // - // For any value of j that is representable as a 48-bit integer, create - // a sequence of: + // For any 64-bit value that is representable as a 48-bit integer: // li d,j => lui d,hi16(j) // ori d,d,hi16(lo32(j)) // dsll d,d,16 @@ -1778,7 +1780,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, // | 16-bytes | 16-bytes | 16-bytes | 16-bytes | // |__________|__________|__________|__________| // - // For any value of j that isn't representable as a 48-bit integer. + // For all other values which are representable as a 64-bit integer: // li d,j => lui d,hi16(j) // ori d,d,lo16(hi32(j)) // dsll d,d,16 @@ -2048,13 +2050,11 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, if (isLoad && IsGPR && (BaseRegNum != RegOpNum)) TmpRegNum = RegOpNum; else { - int AT = getATReg(IDLoc); // At this point we need AT to perform the expansions and we exit if it is // not available. - if (!AT) + TmpRegNum = getATReg(IDLoc); + if (!TmpRegNum) return; - TmpRegNum = getReg( - (isGP64bit()) ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, AT); } TempInst.setOpcode(Mips::LUi); @@ -2078,12 +2078,14 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, // Prepare TempInst for next instruction. TempInst.clear(); // Add temp register to base. - TempInst.setOpcode(Mips::ADDu); - TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); - TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); - TempInst.addOperand(MCOperand::CreateReg(BaseRegNum)); - Instructions.push_back(TempInst); - TempInst.clear(); + if (BaseRegNum != Mips::ZERO) { + TempInst.setOpcode(Mips::ADDu); + TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); + TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); + TempInst.addOperand(MCOperand::CreateReg(BaseRegNum)); + Instructions.push_back(TempInst); + TempInst.clear(); + } // And finally, create original instruction with low part // of offset and new base. TempInst.setOpcode(Inst.getOpcode()); @@ -2383,11 +2385,15 @@ int MipsAsmParser::matchMSA128CtrlRegisterName(StringRef Name) { return CC; } -int MipsAsmParser::getATReg(SMLoc Loc) { - int AT = AssemblerOptions.back()->getATRegNum(); - if (AT == 0) +unsigned MipsAsmParser::getATReg(SMLoc Loc) { + unsigned ATIndex = AssemblerOptions.back()->getATRegNum(); + if (ATIndex == 0) { reportParseError(Loc, "pseudo-instruction requires $at, which is not available"); + return 0; + } + unsigned AT = getReg( + (isGP64bit()) ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, ATIndex); return AT; } @@ -2571,7 +2577,7 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) { if (Tok.isNot(AsmToken::Identifier)) return true; - std::string Str = Tok.getIdentifier().str(); + std::string Str = Tok.getIdentifier(); Parser.Lex(); // Eat the identifier. // Now make an expression from the rest of the operand. @@ -3579,11 +3585,7 @@ bool MipsAsmParser::parseSetAssignment() { if (Parser.parseExpression(Value)) return reportParseError("expected valid expression after comma"); - // Check if the Name already exists as a symbol. - MCSymbol *Sym = getContext().LookupSymbol(Name); - if (Sym) - return reportParseError("symbol already defined"); - Sym = getContext().GetOrCreateSymbol(Name); + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); Sym->setVariableValue(Value); return false; @@ -4044,6 +4046,23 @@ bool MipsAsmParser::parseDirectiveOption() { return false; } +/// parseInsnDirective +/// ::= .insn +bool MipsAsmParser::parseInsnDirective() { + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + + // The actual label marking happens in + // MipsELFStreamer::createPendingLabelRelocs(). + getTargetStreamer().emitDirectiveInsn(); + + getParser().Lex(); // Eat EndOfStatement token. + return false; +} + /// parseDirectiveModule /// ::= .module oddspreg /// ::= .module nooddspreg @@ -4437,6 +4456,9 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { if (IDVal == ".llvm_internal_mips_reallow_module_directive") return parseInternalDirectiveReallowModule(); + if (IDVal == ".insn") + return parseInsnDirective(); + return true; } diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index aad549d..e80a47b 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -77,7 +77,7 @@ void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { } void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { + StringRef Annot, const MCSubtargetInfo &STI) { switch (MI->getOpcode()) { default: break; diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h index 468dc07..713f35c 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h @@ -86,7 +86,8 @@ public: static const char *getRegisterName(unsigned RegNo); void printRegName(raw_ostream &OS, unsigned RegNo) const override; - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; bool printAliasInstr(const MCInst *MI, raw_ostream &OS); void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx, diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index acf6f21..dbcd867 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -155,7 +155,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, return Value; } -MCObjectWriter *MipsAsmBackend::createObjectWriter(raw_ostream &OS) const { +MCObjectWriter * +MipsAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { return createMipsELFObjectWriter(OS, MCELFObjectTargetWriter::getOSABI(OSType), IsLittle, Is64Bit); } diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h index 243b73d..b3d5a49 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h @@ -36,7 +36,7 @@ public: bool Is64Bit) : MCAsmBackend(), OSType(OSType), IsLittle(IsLittle), Is64Bit(Is64Bit) {} - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override; + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value, bool IsPCRel) const override; diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index a68bf16..8d9e3e3 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -10,6 +10,7 @@ #include "MCTargetDesc/MipsBaseInfo.h" #include "MCTargetDesc/MipsFixupKinds.h" #include "MCTargetDesc/MipsMCTargetDesc.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCELF.h" #include "llvm/MC/MCELFObjectWriter.h" @@ -22,17 +23,33 @@ using namespace llvm; namespace { +// A helper structure based on ELFRelocationEntry, used for sorting entries in +// the relocation table. +struct MipsRelocationEntry { + MipsRelocationEntry(const ELFRelocationEntry &R) + : R(R), SortOffset(R.Offset), HasMatchingHi(false) {} + const ELFRelocationEntry R; + // SortOffset equals R.Offset except for the *HI16 relocations, for which it + // will be set based on the R.Offset of the matching *LO16 relocation. + int64_t SortOffset; + // True when this is a *LO16 relocation chosen as a match for a *HI16 + // relocation. + bool HasMatchingHi; +}; + class MipsELFObjectWriter : public MCELFObjectTargetWriter { public: MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, bool _isN64, bool IsLittleEndian); - virtual ~MipsELFObjectWriter(); + ~MipsELFObjectWriter() override; unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; bool needsRelocateWithSymbol(const MCSymbolData &SD, unsigned Type) const override; + virtual void sortRelocs(const MCAssembler &Asm, + std::vector<ELFRelocationEntry> &Relocs) override; }; } @@ -225,6 +242,169 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, return Type; } +// Sort entries by SortOffset in descending order. +// When there are more *HI16 relocs paired with one *LO16 reloc, the 2nd rule +// sorts them in ascending order of R.Offset. +static int cmpRelMips(const MipsRelocationEntry *AP, + const MipsRelocationEntry *BP) { + const MipsRelocationEntry &A = *AP; + const MipsRelocationEntry &B = *BP; + if (A.SortOffset != B.SortOffset) + return B.SortOffset - A.SortOffset; + if (A.R.Offset != B.R.Offset) + return A.R.Offset - B.R.Offset; + if (B.R.Type != A.R.Type) + return B.R.Type - A.R.Type; + //llvm_unreachable("ELFRelocs might be unstable!"); + return 0; +} + +// For the given Reloc.Type, return the matching relocation type, as in the +// table below. +static unsigned getMatchingLoType(const MCAssembler &Asm, + const ELFRelocationEntry &Reloc) { + unsigned Type = Reloc.Type; + if (Type == ELF::R_MIPS_HI16) + return ELF::R_MIPS_LO16; + if (Type == ELF::R_MICROMIPS_HI16) + return ELF::R_MICROMIPS_LO16; + if (Type == ELF::R_MIPS16_HI16) + return ELF::R_MIPS16_LO16; + + const MCSymbolData &SD = Asm.getSymbolData(*Reloc.Symbol); + + if (MCELF::GetBinding(SD) != ELF::STB_LOCAL) + return ELF::R_MIPS_NONE; + + if (Type == ELF::R_MIPS_GOT16) + return ELF::R_MIPS_LO16; + if (Type == ELF::R_MICROMIPS_GOT16) + return ELF::R_MICROMIPS_LO16; + if (Type == ELF::R_MIPS16_GOT16) + return ELF::R_MIPS16_LO16; + + return ELF::R_MIPS_NONE; +} + +// Return true if First needs a matching *LO16, its matching *LO16 type equals +// Second's type and both relocations are against the same symbol. +static bool areMatchingHiAndLo(const MCAssembler &Asm, + const ELFRelocationEntry &First, + const ELFRelocationEntry &Second) { + return getMatchingLoType(Asm, First) != ELF::R_MIPS_NONE && + getMatchingLoType(Asm, First) == Second.Type && + First.Symbol && First.Symbol == Second.Symbol; +} + +// Return true if MipsRelocs[Index] is a *LO16 preceded by a matching *HI16. +static bool +isPrecededByMatchingHi(const MCAssembler &Asm, uint32_t Index, + std::vector<MipsRelocationEntry> &MipsRelocs) { + return Index < MipsRelocs.size() - 1 && + areMatchingHiAndLo(Asm, MipsRelocs[Index + 1].R, MipsRelocs[Index].R); +} + +// Return true if MipsRelocs[Index] is a *LO16 not preceded by a matching *HI16 +// and not chosen by a *HI16 as a match. +static bool isFreeLo(const MCAssembler &Asm, uint32_t Index, + std::vector<MipsRelocationEntry> &MipsRelocs) { + return Index < MipsRelocs.size() && !MipsRelocs[Index].HasMatchingHi && + !isPrecededByMatchingHi(Asm, Index, MipsRelocs); +} + +// Lo is chosen as a match for Hi, set their fields accordingly. +// Mips instructions have fixed length of at least two bytes (two for +// micromips/mips16, four for mips32/64), so we can set HI's SortOffset to +// matching LO's Offset minus one to simplify the sorting function. +static void setMatch(MipsRelocationEntry &Hi, MipsRelocationEntry &Lo) { + Lo.HasMatchingHi = true; + Hi.SortOffset = Lo.R.Offset - 1; +} + +// We sort relocation table entries by offset, except for one additional rule +// required by MIPS ABI: every *HI16 relocation must be immediately followed by +// the corresponding *LO16 relocation. We also support a GNU extension that +// allows more *HI16s paired with one *LO16. +// +// *HI16 relocations and their matching *LO16 are: +// +// +---------------------------------------------+-------------------+ +// | *HI16 | matching *LO16 | +// |---------------------------------------------+-------------------| +// | R_MIPS_HI16, local R_MIPS_GOT16 | R_MIPS_LO16 | +// | R_MICROMIPS_HI16, local R_MICROMIPS_GOT16 | R_MICROMIPS_LO16 | +// | R_MIPS16_HI16, local R_MIPS16_GOT16 | R_MIPS16_LO16 | +// +---------------------------------------------+-------------------+ +// +// (local R_*_GOT16 meaning R_*_GOT16 against the local symbol.) +// +// To handle *HI16 and *LO16 relocations, the linker needs a combined addend +// ("AHL") calculated from both *HI16 ("AHI") and *LO16 ("ALO") relocations: +// AHL = (AHI << 16) + (short)ALO; +// +// We are reusing gnu as sorting algorithm so we are emitting the relocation +// table sorted the same way as gnu as would sort it, for easier comparison of +// the generated .o files. +// +// The logic is: +// search the table (starting from the highest offset and going back to zero) +// for all *HI16 relocations that don't have a matching *LO16. +// For every such HI, find a matching LO with highest offset that isn't already +// matched with another HI. If there are no free LOs, match it with the first +// found (starting from lowest offset). +// When there are more HIs matched with one LO, sort them in descending order by +// offset. +// +// In other words, when searching for a matching LO: +// - don't look for a 'better' match for the HIs that are already followed by a +// matching LO; +// - prefer LOs without a pair; +// - prefer LOs with higher offset; +void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm, + std::vector<ELFRelocationEntry> &Relocs) { + if (Relocs.size() < 2) + return; + + // The default function sorts entries by Offset in descending order. + MCELFObjectTargetWriter::sortRelocs(Asm, Relocs); + + // Init MipsRelocs from Relocs. + std::vector<MipsRelocationEntry> MipsRelocs; + for (unsigned I = 0, E = Relocs.size(); I != E; ++I) + MipsRelocs.push_back(MipsRelocationEntry(Relocs[I])); + + // Find a matching LO for all HIs that need it. + for (int32_t I = 0, E = MipsRelocs.size(); I != E; ++I) { + if (getMatchingLoType(Asm, MipsRelocs[I].R) == ELF::R_MIPS_NONE || + (I > 0 && isPrecededByMatchingHi(Asm, I - 1, MipsRelocs))) + continue; + + int32_t MatchedLoIndex = -1; + + // Search the list in the ascending order of Offset. + for (int32_t J = MipsRelocs.size() - 1, N = -1; J != N; --J) { + // check for a match + if (areMatchingHiAndLo(Asm, MipsRelocs[I].R, MipsRelocs[J].R) && + (MatchedLoIndex == -1 || // first match + // or we already have a match, + // but this one is with higher offset and it's free + (MatchedLoIndex > J && isFreeLo(Asm, J, MipsRelocs)))) + MatchedLoIndex = J; + } + + if (MatchedLoIndex != -1) + // We have a match. + setMatch(MipsRelocs[I], MipsRelocs[MatchedLoIndex]); + } + + // SortOffsets are calculated, call the sorting function. + array_pod_sort(MipsRelocs.begin(), MipsRelocs.end(), cmpRelMips); + + // Copy sorted MipsRelocs back to Relocs. + for (unsigned I = 0, E = MipsRelocs.size(); I != E; ++I) + Relocs[I] = MipsRelocs[I].R; +} + bool MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD, unsigned Type) const { @@ -264,7 +444,8 @@ MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD, } } -MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS, uint8_t OSABI, +MCObjectWriter *llvm::createMipsELFObjectWriter(raw_pwrite_stream &OS, + uint8_t OSABI, bool IsLittleEndian, bool Is64Bit) { MCELFObjectTargetWriter *MOTW = diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp index 93f60df..6d1d9f4 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp @@ -21,8 +21,6 @@ void MipsELFStreamer::EmitInstruction(const MCInst &Inst, MCContext &Context = getContext(); const MCRegisterInfo *MCRegInfo = Context.getRegisterInfo(); - MipsTargetELFStreamer *ELFTargetStreamer = - static_cast<MipsTargetELFStreamer *>(getTargetStreamer()); for (unsigned OpIndex = 0; OpIndex < Inst.getNumOperands(); ++OpIndex) { const MCOperand &Op = Inst.getOperand(OpIndex); @@ -34,6 +32,14 @@ void MipsELFStreamer::EmitInstruction(const MCInst &Inst, RegInfoRecord->SetPhysRegUsed(Reg, MCRegInfo); } + createPendingLabelRelocs(); +} + +void MipsELFStreamer::createPendingLabelRelocs() { + MipsTargetELFStreamer *ELFTargetStreamer = + static_cast<MipsTargetELFStreamer *>(getTargetStreamer()); + + // FIXME: Also mark labels when in MIPS16 mode. if (ELFTargetStreamer->isMicroMipsEnabled()) { for (auto Label : Labels) { MCSymbolData &Data = getOrCreateSymbolData(Label); @@ -70,7 +76,8 @@ void MipsELFStreamer::EmitMipsOptionRecords() { } MCELFStreamer *llvm::createMipsELFStreamer(MCContext &Context, - MCAsmBackend &MAB, raw_ostream &OS, + MCAsmBackend &MAB, + raw_pwrite_stream &OS, MCCodeEmitter *Emitter, bool RelaxAll) { return new MipsELFStreamer(Context, MAB, OS, Emitter); diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h index 6b834c6..4e30901 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h @@ -33,7 +33,7 @@ class MipsELFStreamer : public MCELFStreamer { public: - MipsELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS, + MipsELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS, MCCodeEmitter *Emitter) : MCELFStreamer(Context, MAB, OS, Emitter) { @@ -65,10 +65,13 @@ public: /// Emits all the option records stored up until the point it's called. void EmitMipsOptionRecords(); + + /// Mark labels as microMIPS, if necessary for the subtarget. + void createPendingLabelRelocs(); }; MCELFStreamer *createMipsELFStreamer(MCContext &Context, MCAsmBackend &MAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - bool RelaxAll); + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll); } // namespace llvm. #endif diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h index b01726d..cc40e2e 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h @@ -43,7 +43,7 @@ public: MipsMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_, bool IsLittle) : MCII(mcii), Ctx(Ctx_), IsLittleEndian(IsLittle) {} - ~MipsMCCodeEmitter() {} + ~MipsMCCodeEmitter() override {} void EmitByte(unsigned char C, raw_ostream &OS) const; diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h index e6b5be7..687b800 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h @@ -23,9 +23,8 @@ bool baseRegNeedsLoadStoreMask(unsigned Reg); // This function creates an MCELFStreamer for Mips NaCl. MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, - MCCodeEmitter *Emitter, - bool RelaxAll); + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll); } #endif diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index 6f3f37b..a75d27d 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -97,17 +97,16 @@ static MCCodeGenInfo *createMipsMCCodeGenInfo(StringRef TT, Reloc::Model RM, return X; } -static MCInstPrinter *createMipsMCInstPrinter(const Target &T, +static MCInstPrinter *createMipsMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { + const MCRegisterInfo &MRI) { return new MipsInstPrinter(MAI, MII, MRI); } static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context, - MCAsmBackend &MAB, raw_ostream &OS, + MCAsmBackend &MAB, raw_pwrite_stream &OS, MCCodeEmitter *Emitter, bool RelaxAll) { MCStreamer *S; if (!T.isOSNaCl()) diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h index 92f394a..577a8b3 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h @@ -27,6 +27,7 @@ class MCSubtargetInfo; class StringRef; class Target; class raw_ostream; +class raw_pwrite_stream; extern Target TheMipsTarget; extern Target TheMipselTarget; @@ -53,7 +54,7 @@ MCAsmBackend *createMipsAsmBackendEL64(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); -MCObjectWriter *createMipsELFObjectWriter(raw_ostream &OS, uint8_t OSABI, +MCObjectWriter *createMipsELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, bool IsLittleEndian, bool Is64Bit); namespace MIPS_MC { diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp index 1adfdf9..35348aa 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp @@ -36,11 +36,11 @@ const unsigned LoadStoreStackMaskReg = Mips::T7; class MipsNaClELFStreamer : public MipsELFStreamer { public: - MipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, - MCCodeEmitter *Emitter) + MipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_pwrite_stream &OS, MCCodeEmitter *Emitter) : MipsELFStreamer(Context, TAB, OS, Emitter), PendingCall(false) {} - ~MipsNaClELFStreamer() {} + ~MipsNaClELFStreamer() override {} private: // Whether we started the sandboxing sequence for calls. Calls are bundled @@ -252,7 +252,7 @@ bool baseRegNeedsLoadStoreMask(unsigned Reg) { } MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, + raw_pwrite_stream &OS, MCCodeEmitter *Emitter, bool RelaxAll) { MipsNaClELFStreamer *S = new MipsNaClELFStreamer(Context, TAB, OS, Emitter); diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index 5790a5c..cfd56c6 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -54,6 +54,7 @@ void MipsTargetStreamer::emitDirectiveNaN2008() {} void MipsTargetStreamer::emitDirectiveNaNLegacy() {} void MipsTargetStreamer::emitDirectiveOptionPic0() {} void MipsTargetStreamer::emitDirectiveOptionPic2() {} +void MipsTargetStreamer::emitDirectiveInsn() { forbidModuleDirective(); } void MipsTargetStreamer::emitFrame(unsigned StackReg, unsigned StackSize, unsigned ReturnReg) {} void MipsTargetStreamer::emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) {} @@ -189,6 +190,11 @@ void MipsTargetAsmStreamer::emitDirectiveOptionPic2() { OS << "\t.option\tpic2\n"; } +void MipsTargetAsmStreamer::emitDirectiveInsn() { + MipsTargetStreamer::emitDirectiveInsn(); + OS << "\t.insn\n"; +} + void MipsTargetAsmStreamer::emitFrame(unsigned StackReg, unsigned StackSize, unsigned ReturnReg) { OS << "\t.frame\t$" @@ -507,9 +513,8 @@ void MipsTargetELFStreamer::emitAssignment(MCSymbol *Symbol, const MCSymbol &RhsSym = static_cast<const MCSymbolRefExpr *>(Value)->getSymbol(); MCSymbolData &Data = getStreamer().getOrCreateSymbolData(&RhsSym); - uint8_t Type = MCELF::GetType(Data); - if ((Type != ELF::STT_FUNC) || - !(MCELF::getOther(Data) & (ELF::STO_MIPS_MICROMIPS >> 2))) + + if (!(MCELF::getOther(Data) & (ELF::STO_MIPS_MICROMIPS >> 2))) return; MCSymbolData &SymbolData = getStreamer().getOrCreateSymbolData(Symbol); @@ -637,6 +642,12 @@ void MipsTargetELFStreamer::emitDirectiveOptionPic2() { MCA.setELFHeaderEFlags(Flags); } +void MipsTargetELFStreamer::emitDirectiveInsn() { + MipsTargetStreamer::emitDirectiveInsn(); + MipsELFStreamer &MEF = static_cast<MipsELFStreamer &>(Streamer); + MEF.createPendingLabelRelocs(); +} + void MipsTargetELFStreamer::emitFrame(unsigned StackReg, unsigned StackSize, unsigned ReturnReg_) { MCContext &Context = getStreamer().getAssembler().getContext(); diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp index abecfa0..5828fbd 100644 --- a/lib/Target/Mips/Mips16FrameLowering.cpp +++ b/lib/Target/Mips/Mips16FrameLowering.cpp @@ -143,25 +143,6 @@ bool Mips16FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } -// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions -void Mips16FrameLowering:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - if (!hasReservedCallFrame(MF)) { - int64_t Amount = I->getOperand(0).getImm(); - - if (I->getOpcode() == Mips::ADJCALLSTACKDOWN) - Amount = -Amount; - - const Mips16InstrInfo &TII = - *static_cast<const Mips16InstrInfo *>(STI.getInstrInfo()); - - TII.adjustStackPtr(Mips::SP, Amount, MBB, I); - } - - MBB.erase(I); -} - bool Mips16FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h index 012d558..0287e59 100644 --- a/lib/Target/Mips/Mips16FrameLowering.h +++ b/lib/Target/Mips/Mips16FrameLowering.h @@ -26,10 +26,6 @@ public: void emitPrologue(MachineFunction &MF) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const override; - bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index 00d4495..a49572e 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -293,6 +293,9 @@ void Mips16InstrInfo::adjustStackPtrBigUnrestricted( void Mips16InstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { + if (Amount == 0) + return; + if (isInt<16>(Amount)) // need to change to addiu sp, ....and isInt<16> BuildAddiuSpImm(MBB, I, Amount); else diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h index f9b7387..6540b40 100644 --- a/lib/Target/Mips/Mips16InstrInfo.h +++ b/lib/Target/Mips/Mips16InstrInfo.h @@ -77,7 +77,7 @@ public: /// Adjust SP by Amount bytes. void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; + MachineBasicBlock::iterator I) const override; /// Emit a series of instructions to load an immediate. // This is to adjust some FrameReg. We return the new register to be used diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 1eb3b2c..9024f21 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -260,31 +260,22 @@ void MipsAsmPrinter::printSavedRegsBitmask() { unsigned AFGR64RegSize = Mips::AFGR64RegClass.getSize(); bool HasAFGR64Reg = false; unsigned CSFPRegsSize = 0; - unsigned i, e = CSI.size(); - - // Set FPU Bitmask. - for (i = 0; i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - if (Mips::GPR32RegClass.contains(Reg)) - break; + for (const auto &I : CSI) { + unsigned Reg = I.getReg(); unsigned RegNum = TRI->getEncodingValue(Reg); - if (Mips::AFGR64RegClass.contains(Reg)) { + + // If it's a floating point register, set the FPU Bitmask. + // If it's a general purpose register, set the CPU Bitmask. + if (Mips::FGR32RegClass.contains(Reg)) { + FPUBitmask |= (1 << RegNum); + CSFPRegsSize += FGR32RegSize; + } else if (Mips::AFGR64RegClass.contains(Reg)) { FPUBitmask |= (3 << RegNum); CSFPRegsSize += AFGR64RegSize; HasAFGR64Reg = true; - continue; - } - - FPUBitmask |= (1 << RegNum); - CSFPRegsSize += FGR32RegSize; - } - - // Set CPU Bitmask. - for (; i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - unsigned RegNum = TRI->getEncodingValue(Reg); - CPUBitmask |= (1 << RegNum); + } else if (Mips::GPR32RegClass.contains(Reg)) + CPUBitmask |= (1 << RegNum); } // FP Regs are saved right below where the virtual frame pointer points to. diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp index 7de0081..e8e3d3d 100644 --- a/lib/Target/Mips/MipsFastISel.cpp +++ b/lib/Target/Mips/MipsFastISel.cpp @@ -440,7 +440,7 @@ bool MipsFastISel::computeAddress(const Value *Obj, Address &Addr) { bool MipsFastISel::computeCallAddress(const Value *V, Address &Addr) { const GlobalValue *GV = dyn_cast<GlobalValue>(V); - if (GV && isa<Function>(GV) && dyn_cast<Function>(GV)->isIntrinsic()) + if (GV && isa<Function>(GV) && cast<Function>(GV)->isIntrinsic()) return false; if (!GV) return false; diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index 8b8b019..826fbaf 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -131,3 +131,20 @@ uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const { return RoundUpToAlignment(Offset, getStackAlignment()); } + +// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions +void MipsFrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + unsigned SP = STI.getABI().IsN64() ? Mips::SP_64 : Mips::SP; + + if (!hasReservedCallFrame(MF)) { + int64_t Amount = I->getOperand(0).getImm(); + if (I->getOpcode() == Mips::ADJCALLSTACKDOWN) + Amount = -Amount; + + STI.getInstrInfo()->adjustStackPtr(SP, Amount, MBB, I); + } + + MBB.erase(I); +} diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h index 90a8d2a..96d1e29 100644 --- a/lib/Target/Mips/MipsFrameLowering.h +++ b/lib/Target/Mips/MipsFrameLowering.h @@ -32,6 +32,11 @@ public: bool hasFP(const MachineFunction &MF) const override; + void + eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const override; + protected: uint64_t estimateStackSize(const MachineFunction &MF) const; }; diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index e4bae03..f37737d 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -3682,6 +3682,7 @@ void MipsTargetLowering::passByValArg( DAG.getIntPtrConstant(VA.getLocMemOffset())); Chain = DAG.getMemcpy(Chain, DL, Dst, Src, DAG.getConstant(MemCpySize, PtrTy), Alignment, /*isVolatile=*/false, /*AlwaysInline=*/false, + /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo()); MemOpChains.push_back(Chain); } diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 7b2b289..4589535 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -117,6 +117,10 @@ public: const TargetRegisterInfo *TRI, int64_t Offset) const = 0; + virtual void adjustStackPtr(unsigned SP, int64_t Amount, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const = 0; + /// Create an instruction which has the same operands and memory operands /// as MI but has a new opcode. MachineInstrBuilder genInstrWithNewOpc(unsigned NewOpc, diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index c937d2b..a1fad66 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -186,10 +186,8 @@ def InMips16Mode : Predicate<"Subtarget->inMips16Mode()">, AssemblerPredicate<"FeatureMips16">; def HasCnMips : Predicate<"Subtarget->hasCnMips()">, AssemblerPredicate<"FeatureCnMips">; -def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">, - AssemblerPredicate<"FeatureMips32">; -def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">, - AssemblerPredicate<"FeatureMips32">; +def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">; +def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">; def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">; def HasStdEnc : Predicate<"Subtarget->hasStandardEncoding()">, AssemblerPredicate<"!FeatureMips16">; @@ -1596,8 +1594,12 @@ def : MipsInstAlias<"b $offset", (BEQ ZERO, ZERO, brtarget:$offset), 0>; } def : MipsInstAlias<"bnez $rs,$offset", (BNE GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>; +def : MipsInstAlias<"bnezl $rs,$offset", + (BNEL GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>; def : MipsInstAlias<"beqz $rs,$offset", (BEQ GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>; +def : MipsInstAlias<"beqzl $rs,$offset", + (BEQL GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>; def : MipsInstAlias<"syscall", (SYSCALL 0), 1>; def : MipsInstAlias<"break", (BREAK 0, 0), 1>; diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp index 09e722d..0d1ee04 100644 --- a/lib/Target/Mips/MipsMachineFunction.cpp +++ b/lib/Target/Mips/MipsMachineFunction.cpp @@ -60,15 +60,7 @@ void MipsCallEntry::printCustom(raw_ostream &O) const { #endif } -MipsFunctionInfo::~MipsFunctionInfo() { - for (StringMap<const MipsCallEntry *>::iterator - I = ExternalCallEntries.begin(), E = ExternalCallEntries.end(); I != E; - ++I) - delete I->getValue(); - - for (const auto &Entry : GlobalCallEntries) - delete Entry.second; -} +MipsFunctionInfo::~MipsFunctionInfo() {} bool MipsFunctionInfo::globalBaseRegSet() const { return GlobalBaseReg; @@ -125,21 +117,21 @@ bool MipsFunctionInfo::isEhDataRegFI(int FI) const { } MachinePointerInfo MipsFunctionInfo::callPtrInfo(StringRef Name) { - const MipsCallEntry *&E = ExternalCallEntries[Name]; + std::unique_ptr<const MipsCallEntry> &E = ExternalCallEntries[Name]; if (!E) - E = new MipsCallEntry(Name); + E = llvm::make_unique<MipsCallEntry>(Name); - return MachinePointerInfo(E); + return MachinePointerInfo(E.get()); } MachinePointerInfo MipsFunctionInfo::callPtrInfo(const GlobalValue *Val) { - const MipsCallEntry *&E = GlobalCallEntries[Val]; + std::unique_ptr<const MipsCallEntry> &E = GlobalCallEntries[Val]; if (!E) - E = new MipsCallEntry(Val); + E = llvm::make_unique<MipsCallEntry>(Val); - return MachinePointerInfo(E); + return MachinePointerInfo(E.get()); } int MipsFunctionInfo::getMoveF64ViaSpillFI(const TargetRegisterClass *RC) { diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index 217f307..32436ef 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -144,8 +144,9 @@ private: int MoveF64ViaSpillFI; /// MipsCallEntry maps. - StringMap<const MipsCallEntry *> ExternalCallEntries; - ValueMap<const GlobalValue *, const MipsCallEntry *> GlobalCallEntries; + StringMap<std::unique_ptr<const MipsCallEntry>> ExternalCallEntries; + ValueMap<const GlobalValue *, std::unique_ptr<const MipsCallEntry>> + GlobalCallEntries; }; } // end of namespace llvm diff --git a/lib/Target/Mips/MipsOptionRecord.h b/lib/Target/Mips/MipsOptionRecord.h index dc29cbd..746feab 100644 --- a/lib/Target/Mips/MipsOptionRecord.h +++ b/lib/Target/Mips/MipsOptionRecord.h @@ -52,7 +52,7 @@ public: COP2RegClass = &(TRI->getRegClass(Mips::COP2RegClassID)); COP3RegClass = &(TRI->getRegClass(Mips::COP3RegClassID)); } - ~MipsRegInfoRecord() {} + ~MipsRegInfoRecord() override {} void EmitMipsOptionRecord() override; void SetPhysRegUsed(unsigned Reg, const MCRegisterInfo *MCRegInfo); diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index 7c79c4c..23feb5c 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -607,26 +607,6 @@ MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { !MFI->hasVarSizedObjects(); } -// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions -void MipsSEFrameLowering:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const MipsSEInstrInfo &TII = - *static_cast<const MipsSEInstrInfo *>(STI.getInstrInfo()); - - if (!hasReservedCallFrame(MF)) { - int64_t Amount = I->getOperand(0).getImm(); - - if (I->getOpcode() == Mips::ADJCALLSTACKDOWN) - Amount = -Amount; - - unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; - TII.adjustStackPtr(SP, Amount, MBB, I); - } - - MBB.erase(I); -} - void MipsSEFrameLowering:: processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h index 0eca1df..22448a4 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.h +++ b/lib/Target/Mips/MipsSEFrameLowering.h @@ -27,10 +27,6 @@ public: void emitPrologue(MachineFunction &MF) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const override; - bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index a598c3f..6daa632 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -948,7 +948,6 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, llvm_unreachable("Unexpected asm memory constraint"); // All memory constraints can at least accept raw pointers. case InlineAsm::Constraint_i: - case InlineAsm::Constraint_R: OutOps.push_back(Op); OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32)); return false; @@ -961,6 +960,20 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, OutOps.push_back(Op); OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32)); return false; + case InlineAsm::Constraint_R: + // The 'R' constraint is supposed to be much more complicated than this. + // However, it's becoming less useful due to architectural changes and + // ought to be replaced by other constraints such as 'ZC'. + // For now, support 9-bit signed offsets which is supportable by all + // subtargets for all instructions. + if (selectAddrRegImm9(Op, Base, Offset)) { + OutOps.push_back(Base); + OutOps.push_back(Offset); + return false; + } + OutOps.push_back(Op); + OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32)); + return false; case InlineAsm::Constraint_ZC: // ZC matches whatever the pref, ll, and sc instructions can handle for the // given subtarget. diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index b992579..cb38393 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -364,6 +364,9 @@ void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu; + if (Amount == 0) + return; + if (isInt<16>(Amount))// addi sp, sp, amount BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount); else { // Expand immediate that doesn't fit in 16-bit. diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h index d16fab2..bebbabf 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.h +++ b/lib/Target/Mips/MipsSEInstrInfo.h @@ -68,7 +68,7 @@ public: /// Adjust SP by Amount bytes. void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; + MachineBasicBlock::iterator I) const override; /// Emit a series of instructions to load an immediate. If NewImm is a /// non-NULL parameter, the last instruction is not emitted, but instead diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h index 1ff041d..22b0c6c 100644 --- a/lib/Target/Mips/MipsTargetStreamer.h +++ b/lib/Target/Mips/MipsTargetStreamer.h @@ -45,6 +45,7 @@ public: virtual void emitDirectiveNaNLegacy(); virtual void emitDirectiveOptionPic0(); virtual void emitDirectiveOptionPic2(); + virtual void emitDirectiveInsn(); virtual void emitFrame(unsigned StackReg, unsigned StackSize, unsigned ReturnReg); virtual void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff); @@ -160,6 +161,7 @@ public: void emitDirectiveNaNLegacy() override; void emitDirectiveOptionPic0() override; void emitDirectiveOptionPic2() override; + void emitDirectiveInsn() override; void emitFrame(unsigned StackReg, unsigned StackSize, unsigned ReturnReg) override; void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) override; @@ -227,6 +229,7 @@ public: void emitDirectiveNaNLegacy() override; void emitDirectiveOptionPic0() override; void emitDirectiveOptionPic2() override; + void emitDirectiveInsn() override; void emitFrame(unsigned StackReg, unsigned StackSize, unsigned ReturnReg) override; void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) override; diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp index 80b2f62..ac92df9 100644 --- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp +++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp @@ -28,13 +28,9 @@ using namespace llvm; #include "NVPTXGenAsmWriter.inc" - NVPTXInstPrinter::NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) - : MCInstPrinter(MAI, MII, MRI) { - setAvailableFeatures(STI.getFeatureBits()); -} + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} void NVPTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { // Decode the virtual register @@ -72,7 +68,7 @@ void NVPTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { } void NVPTXInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, - StringRef Annot) { + StringRef Annot, const MCSubtargetInfo &STI) { printInstruction(MI, OS); // Next always print the annotation. diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h index 0496964..02c5a21 100644 --- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h +++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h @@ -25,10 +25,11 @@ class MCSubtargetInfo; class NVPTXInstPrinter : public MCInstPrinter { public: NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); + const MCRegisterInfo &MRI); void printRegName(raw_ostream &OS, unsigned RegNo) const override; - void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot, + const MCSubtargetInfo &STI) override; // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp index 2b4d864..f9e4324 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp @@ -58,14 +58,13 @@ static MCCodeGenInfo *createNVPTXMCCodeGenInfo( return X; } -static MCInstPrinter *createNVPTXMCInstPrinter(const Target &T, +static MCInstPrinter *createNVPTXMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { + const MCRegisterInfo &MRI) { if (SyntaxVariant == 0) - return new NVPTXInstPrinter(MAI, MII, MRI, STI); + return new NVPTXInstPrinter(MAI, MII, MRI); return nullptr; } diff --git a/lib/Target/NVPTX/NVPTX.td b/lib/Target/NVPTX/NVPTX.td index 93fabf6..96abfa8 100644 --- a/lib/Target/NVPTX/NVPTX.td +++ b/lib/Target/NVPTX/NVPTX.td @@ -32,20 +32,28 @@ def SM21 : SubtargetFeature<"sm_21", "SmVersion", "21", "Target SM 2.1">; def SM30 : SubtargetFeature<"sm_30", "SmVersion", "30", "Target SM 3.0">; +def SM32 : SubtargetFeature<"sm_32", "SmVersion", "32", + "Target SM 3.2">; def SM35 : SubtargetFeature<"sm_35", "SmVersion", "35", "Target SM 3.5">; +def SM37 : SubtargetFeature<"sm_37", "SmVersion", "37", + "Target SM 3.7">; def SM50 : SubtargetFeature<"sm_50", "SmVersion", "50", "Target SM 5.0">; +def SM52 : SubtargetFeature<"sm_52", "SmVersion", "52", + "Target SM 5.2">; +def SM53 : SubtargetFeature<"sm_53", "SmVersion", "53", + "Target SM 5.3">; // PTX Versions -def PTX30 : SubtargetFeature<"ptx30", "PTXVersion", "30", - "Use PTX version 3.0">; -def PTX31 : SubtargetFeature<"ptx31", "PTXVersion", "31", - "Use PTX version 3.1">; def PTX32 : SubtargetFeature<"ptx32", "PTXVersion", "32", "Use PTX version 3.2">; def PTX40 : SubtargetFeature<"ptx40", "PTXVersion", "40", "Use PTX version 4.0">; +def PTX41 : SubtargetFeature<"ptx41", "PTXVersion", "41", + "Use PTX version 4.1">; +def PTX42 : SubtargetFeature<"ptx42", "PTXVersion", "42", + "Use PTX version 4.2">; //===----------------------------------------------------------------------===// // NVPTX supported processors. @@ -57,8 +65,12 @@ class Proc<string Name, list<SubtargetFeature> Features> def : Proc<"sm_20", [SM20]>; def : Proc<"sm_21", [SM21]>; def : Proc<"sm_30", [SM30]>; +def : Proc<"sm_32", [SM32, PTX40]>; def : Proc<"sm_35", [SM35]>; -def : Proc<"sm_50", [SM50]>; +def : Proc<"sm_37", [SM37, PTX41]>; +def : Proc<"sm_50", [SM50, PTX40]>; +def : Proc<"sm_52", [SM52, PTX41]>; +def : Proc<"sm_53", [SM53, PTX42]>; def NVPTXInstrInfo : InstrInfo { diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index cc58b07..9a71964 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -118,7 +118,7 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { DebugLoc curLoc = MI.getDebugLoc(); - if (prevDebugLoc.isUnknown() && curLoc.isUnknown()) + if (!prevDebugLoc && !curLoc) return; if (prevDebugLoc == curLoc) @@ -126,39 +126,32 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { prevDebugLoc = curLoc; - if (curLoc.isUnknown()) + if (!curLoc) return; - const MachineFunction *MF = MI.getParent()->getParent(); - //const TargetMachine &TM = MF->getTarget(); - - const LLVMContext &ctx = MF->getFunction()->getContext(); - DIScope Scope(curLoc.getScope(ctx)); - - assert((!Scope || Scope.isScope()) && - "Scope of a DebugLoc should be null or a DIScope."); + auto *Scope = cast_or_null<MDScope>(curLoc.getScope()); if (!Scope) return; - StringRef fileName(Scope.getFilename()); - StringRef dirName(Scope.getDirectory()); + StringRef fileName(Scope->getFilename()); + StringRef dirName(Scope->getDirectory()); SmallString<128> FullPathName = dirName; if (!dirName.empty() && !sys::path::is_absolute(fileName)) { sys::path::append(FullPathName, fileName); - fileName = FullPathName.str(); + fileName = FullPathName; } - if (filenameMap.find(fileName.str()) == filenameMap.end()) + if (filenameMap.find(fileName) == filenameMap.end()) return; // Emit the line from the source file. if (InterleaveSrc) - this->emitSrcInText(fileName.str(), curLoc.getLine()); + this->emitSrcInText(fileName, curLoc.getLine()); std::stringstream temp; - temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine() + temp << "\t.loc " << filenameMap[fileName] << " " << curLoc.getLine() << " " << curLoc.getCol(); - OutStreamer.EmitRawText(Twine(temp.str().c_str())); + OutStreamer.EmitRawText(temp.str()); } void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { @@ -641,7 +634,7 @@ static bool usedInGlobalVarDef(const Constant *C) { return false; if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { - if (GV->getName().str() == "llvm.used") + if (GV->getName() == "llvm.used") return false; return true; } @@ -656,7 +649,7 @@ static bool usedInGlobalVarDef(const Constant *C) { static bool usedInOneFunc(const User *U, Function const *&oneFunc) { if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) { - if (othergv->getName().str() == "llvm.used") + if (othergv->getName() == "llvm.used") return true; } @@ -780,32 +773,32 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { DbgFinder.processModule(M); unsigned i = 1; - for (DICompileUnit DIUnit : DbgFinder.compile_units()) { - StringRef Filename(DIUnit.getFilename()); - StringRef Dirname(DIUnit.getDirectory()); + for (const MDCompileUnit *DIUnit : DbgFinder.compile_units()) { + StringRef Filename = DIUnit->getFilename(); + StringRef Dirname = DIUnit->getDirectory(); SmallString<128> FullPathName = Dirname; if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { sys::path::append(FullPathName, Filename); - Filename = FullPathName.str(); + Filename = FullPathName; } - if (filenameMap.find(Filename.str()) != filenameMap.end()) + if (filenameMap.find(Filename) != filenameMap.end()) continue; - filenameMap[Filename.str()] = i; - OutStreamer.EmitDwarfFileDirective(i, "", Filename.str()); + filenameMap[Filename] = i; + OutStreamer.EmitDwarfFileDirective(i, "", Filename); ++i; } - for (DISubprogram SP : DbgFinder.subprograms()) { - StringRef Filename(SP.getFilename()); - StringRef Dirname(SP.getDirectory()); + for (MDSubprogram *SP : DbgFinder.subprograms()) { + StringRef Filename = SP->getFilename(); + StringRef Dirname = SP->getDirectory(); SmallString<128> FullPathName = Dirname; if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { sys::path::append(FullPathName, Filename); - Filename = FullPathName.str(); + Filename = FullPathName; } - if (filenameMap.find(Filename.str()) != filenameMap.end()) + if (filenameMap.find(Filename) != filenameMap.end()) continue; - filenameMap[Filename.str()] = i; + filenameMap[Filename] = i; ++i; } } @@ -1011,7 +1004,7 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, msg.append("Error: "); msg.append("Symbol "); if (V->hasName()) - msg.append(V->getName().str()); + msg.append(V->getName()); msg.append("has unsupported appending linkage type"); llvm_unreachable(msg.c_str()); } else if (!V->hasInternalLinkage() && @@ -1147,7 +1140,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, const Function *demotedFunc = nullptr; if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) { - O << "// " << GVar->getName().str() << " has been demoted\n"; + O << "// " << GVar->getName() << " has been demoted\n"; if (localDecls.find(demotedFunc) != localDecls.end()) localDecls[demotedFunc].push_back(GVar); else { @@ -1195,9 +1188,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, // The frontend adds zero-initializer to variables that don't have an // initial value, so skip warning for this case. if (!GVar->getInitializer()->isNullValue()) { - std::string warnMsg = "initial value of '" + GVar->getName().str() + - "' is not allowed in addrspace(" + - llvm::utostr_32(PTy->getAddressSpace()) + ")"; + std::string warnMsg = + ("initial value of '" + GVar->getName() + + "' is not allowed in addrspace(" + + Twine(llvm::utostr_32(PTy->getAddressSpace())) + ")").str(); report_fatal_error(warnMsg.c_str()); } } @@ -1771,12 +1765,11 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, case Type::IntegerTyID: { const Type *ETy = CPV->getType(); if (ETy == Type::getInt8Ty(CPV->getContext())) { - unsigned char c = - (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); + unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue(); ptr = &c; aggBuffer->addBytes(ptr, 1, Bytes); } else if (ETy == Type::getInt16Ty(CPV->getContext())) { - short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); + short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue(); ptr = (unsigned char *)&int16; aggBuffer->addBytes(ptr, 2, Bytes); } else if (ETy == Type::getInt32Ty(CPV->getContext())) { @@ -2086,7 +2079,7 @@ void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { std::stringstream temp; - LineReader *reader = this->getReader(filename.str()); + LineReader *reader = this->getReader(filename); temp << "\n//"; temp << filename.str(); temp << ":"; @@ -2094,7 +2087,7 @@ void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { temp << " "; temp << reader->readLine(line); temp << "\n"; - this->OutStreamer.EmitRawText(Twine(temp.str())); + this->OutStreamer.EmitRawText(temp.str()); } LineReader *NVPTXAsmPrinter::getReader(std::string filename) { diff --git a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp index 6d7c99c..ae63cae 100644 --- a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp +++ b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp @@ -132,9 +132,8 @@ bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP( } else { // GEP is a constant expression. Constant *NewGEPCE = ConstantExpr::getGetElementPtr( - cast<Constant>(Cast->getOperand(0)), - Indices, - GEP->isInBounds()); + GEP->getSourceElementType(), cast<Constant>(Cast->getOperand(0)), + Indices, GEP->isInBounds()); GEP->replaceAllUsesWith( ConstantExpr::getAddrSpaceCast(NewGEPCE, GEP->getType())); } diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp index 850c020..6fd09c4 100644 --- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp +++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp @@ -347,6 +347,7 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C, NewOperands[0], makeArrayRef(&NewOperands[1], NumOperands - 1)) : Builder.CreateInBoundsGEP( + cast<GEPOperator>(C)->getSourceElementType(), NewOperands[0], makeArrayRef(&NewOperands[1], NumOperands - 1)); case Instruction::Select: diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index ff74e6e..8b06657 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -3893,7 +3893,7 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, const SDNode *left = N0.getOperand(0).getNode(); const SDNode *right = N0.getOperand(1).getNode(); - if (dyn_cast<ConstantSDNode>(left) || dyn_cast<ConstantSDNode>(right)) + if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right)) opIsLive = true; if (!opIsLive) diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index 578401a..6ab0fad 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -70,8 +70,8 @@ static void convertTransferToLoop( // srcAddr and dstAddr are expected to be pointer types, // so no check is made here. - unsigned srcAS = dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace(); - unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace(); + unsigned srcAS = cast<PointerType>(srcAddr->getType())->getAddressSpace(); + unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace(); // Cast pointers to (char *) srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS)); @@ -84,9 +84,11 @@ static void convertTransferToLoop( ind->addIncoming(ConstantInt::get(indType, 0), origBB); // load from srcAddr+ind - Value *val = loop.CreateLoad(loop.CreateGEP(srcAddr, ind), srcVolatile); + Value *val = loop.CreateLoad(loop.CreateGEP(loop.getInt8Ty(), srcAddr, ind), + srcVolatile); // store at dstAddr+ind - loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), dstVolatile); + loop.CreateStore(val, loop.CreateGEP(loop.getInt8Ty(), dstAddr, ind), + dstVolatile); // The value for ind coming from backedge is (ind + 1) Value *newind = loop.CreateAdd(ind, ConstantInt::get(indType, 1)); @@ -106,7 +108,7 @@ static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr, origBB->getTerminator()->setSuccessor(0, loopBB); IRBuilder<> builder(origBB, origBB->getTerminator()); - unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace(); + unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace(); // Cast pointer to the type of value getting stored dstAddr = @@ -116,7 +118,7 @@ static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr, PHINode *ind = loop.CreatePHI(len->getType(), 0); ind->addIncoming(ConstantInt::get(len->getType(), 0), origBB); - loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), false); + loop.CreateStore(val, loop.CreateGEP(val->getType(), dstAddr, ind), false); Value *newind = loop.CreateAdd(ind, ConstantInt::get(len->getType(), 1)); ind->addIncoming(newind, loopBB); diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h index b8df5af..2cd10e8 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -52,7 +52,7 @@ public: TargetPassConfig *createPassConfig(PassManagerBase &PM) override; // Emission of machine code through MCJIT is not supported. - bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &, + bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_pwrite_stream &, bool = true) override { return true; } diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index b8af04d..dc81802 100644 --- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "NVPTXTargetTransformInfo.h" +#include "NVPTXUtilities.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -19,6 +20,75 @@ using namespace llvm; #define DEBUG_TYPE "NVPTXtti" +// Whether the given intrinsic reads threadIdx.x/y/z. +static bool readsThreadIndex(const IntrinsicInst *II) { + switch (II->getIntrinsicID()) { + default: return false; + case Intrinsic::nvvm_read_ptx_sreg_tid_x: + case Intrinsic::nvvm_read_ptx_sreg_tid_y: + case Intrinsic::nvvm_read_ptx_sreg_tid_z: + return true; + } +} + +static bool readsLaneId(const IntrinsicInst *II) { + return II->getIntrinsicID() == Intrinsic::ptx_read_laneid; +} + +// Whether the given intrinsic is an atomic instruction in PTX. +static bool isNVVMAtomic(const IntrinsicInst *II) { + switch (II->getIntrinsicID()) { + default: return false; + case Intrinsic::nvvm_atomic_load_add_f32: + case Intrinsic::nvvm_atomic_load_inc_32: + case Intrinsic::nvvm_atomic_load_dec_32: + return true; + } +} + +bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) { + // Without inter-procedural analysis, we conservatively assume that arguments + // to __device__ functions are divergent. + if (const Argument *Arg = dyn_cast<Argument>(V)) + return !isKernelFunction(*Arg->getParent()); + + if (const Instruction *I = dyn_cast<Instruction>(V)) { + // Without pointer analysis, we conservatively assume values loaded from + // generic or local address space are divergent. + if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { + unsigned AS = LI->getPointerAddressSpace(); + return AS == ADDRESS_SPACE_GENERIC || AS == ADDRESS_SPACE_LOCAL; + } + // Atomic instructions may cause divergence. Atomic instructions are + // executed sequentially across all threads in a warp. Therefore, an earlier + // executed thread may see different memory inputs than a later executed + // thread. For example, suppose *a = 0 initially. + // + // atom.global.add.s32 d, [a], 1 + // + // returns 0 for the first thread that enters the critical region, and 1 for + // the second thread. + if (I->isAtomic()) + return true; + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + // Instructions that read threadIdx are obviously divergent. + if (readsThreadIndex(II) || readsLaneId(II)) + return true; + // Handle the NVPTX atomic instrinsics that cannot be represented as an + // atomic IR instruction. + if (isNVVMAtomic(II)) + return true; + } + // Conservatively consider the return value of function calls as divergent. + // We could analyze callees with bodies more precisely using + // inter-procedural analysis. + if (isa<CallInst>(I)) + return true; + } + + return false; +} + unsigned NVPTXTTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h index bf21e88..4280888 100644 --- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -61,6 +61,8 @@ public: bool hasBranchDivergence() { return true; } + bool isSourceOfDivergence(const Value *V); + unsigned getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 99a1633..90ab7a5 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -1071,6 +1071,58 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst, Inst = TmpInst; break; } + case PPC::RLWINMbm: + case PPC::RLWINMobm: { + unsigned MB, ME; + int64_t BM = Inst.getOperand(3).getImm(); + if (!isRunOfOnes(BM, MB, ME)) + break; + + MCInst TmpInst; + TmpInst.setOpcode(Opcode == PPC::RLWINMbm ? PPC::RLWINM : PPC::RLWINMo); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(MCOperand::CreateImm(MB)); + TmpInst.addOperand(MCOperand::CreateImm(ME)); + Inst = TmpInst; + break; + } + case PPC::RLWIMIbm: + case PPC::RLWIMIobm: { + unsigned MB, ME; + int64_t BM = Inst.getOperand(3).getImm(); + if (!isRunOfOnes(BM, MB, ME)) + break; + + MCInst TmpInst; + TmpInst.setOpcode(Opcode == PPC::RLWIMIbm ? PPC::RLWIMI : PPC::RLWIMIo); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(0)); // The tied operand. + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(MCOperand::CreateImm(MB)); + TmpInst.addOperand(MCOperand::CreateImm(ME)); + Inst = TmpInst; + break; + } + case PPC::RLWNMbm: + case PPC::RLWNMobm: { + unsigned MB, ME; + int64_t BM = Inst.getOperand(3).getImm(); + if (!isRunOfOnes(BM, MB, ME)) + break; + + MCInst TmpInst; + TmpInst.setOpcode(Opcode == PPC::RLWNMbm ? PPC::RLWNM : PPC::RLWNMo); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(MCOperand::CreateImm(MB)); + TmpInst.addOperand(MCOperand::CreateImm(ME)); + Inst = TmpInst; + break; + } } } diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index a9f5fc7..5cbf3d9 100644 --- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -25,7 +25,7 @@ class PPCDisassembler : public MCDisassembler { public: PPCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : MCDisassembler(STI, Ctx) {} - virtual ~PPCDisassembler() {} + ~PPCDisassembler() override {} DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 311a4f2..1576544 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -51,7 +51,7 @@ void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { } void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { + StringRef Annot, const MCSubtargetInfo &STI) { // Check for slwi/srwi mnemonics. if (MI->getOpcode() == PPC::RLWINM) { unsigned char SH = MI->getOperand(2).getImm(); diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index 8718743..eca37eb 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -32,7 +32,8 @@ public: } void printRegName(raw_ostream &OS, unsigned RegNo) const override; - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index bea88a2..420c5c8 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -208,7 +208,7 @@ namespace { public: DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T, false) { } - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { bool is64 = getPointerSize() == 8; return createPPCMachObjectWriter( OS, @@ -224,8 +224,7 @@ namespace { ELFPPCAsmBackend(const Target &T, bool IsLittleEndian, uint8_t OSABI) : PPCAsmBackend(T, IsLittleEndian), OSABI(OSABI) { } - - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { bool is64 = getPointerSize() == 8; return createPPCELFObjectWriter(OS, is64, isLittleEndian(), OSABI); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index b817394..3e3489f 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -412,7 +412,7 @@ bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD, } } -MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS, +MCObjectWriter *llvm::createPPCELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, bool IsLittleEndian, uint8_t OSABI) { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index b9f0afb..725b47b 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -44,7 +44,7 @@ public: : MCII(mcii), CTX(ctx), IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {} - ~PPCMCCodeEmitter() {} + ~PPCMCCodeEmitter() override {} unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 2f7a768..423e427 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -238,14 +238,12 @@ createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { return new PPCTargetMachOStreamer(S); } -static MCInstPrinter *createPPCMCInstPrinter(const Target &T, +static MCInstPrinter *createPPCMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { - bool isDarwin = Triple(STI.getTargetTriple()).isOSDarwin(); - return new PPCInstPrinter(MAI, MII, MRI, isDarwin); + const MCRegisterInfo &MRI) { + return new PPCInstPrinter(MAI, MII, MRI, T.isOSDarwin()); } extern "C" void LLVMInitializePowerPCTargetMC() { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 8b1e3b4..5f2117c 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -18,6 +18,7 @@ #undef PPC #include "llvm/Support/DataTypes.h" +#include "llvm/Support/MathExtras.h" namespace llvm { class MCAsmBackend; @@ -29,6 +30,7 @@ class MCRegisterInfo; class MCSubtargetInfo; class Target; class StringRef; +class raw_pwrite_stream; class raw_ostream; extern Target ThePPC32Target; @@ -42,15 +44,42 @@ MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII, MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); -/// createPPCELFObjectWriter - Construct an PPC ELF object writer. -MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS, - bool Is64Bit, - bool IsLittleEndian, - uint8_t OSABI); -/// createPPCELFObjectWriter - Construct a PPC Mach-O object writer. -MCObjectWriter *createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit, +/// Construct an PPC ELF object writer. +MCObjectWriter *createPPCELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, + bool IsLittleEndian, uint8_t OSABI); +/// Construct a PPC Mach-O object writer. +MCObjectWriter *createPPCMachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype); + +/// Returns true iff Val consists of one contiguous run of 1s with any number of +/// 0s on either side. The 1s are allowed to wrap from LSB to MSB, so +/// 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is not, +/// since all 1s are not contiguous. +static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) { + if (!Val) + return false; + + if (isShiftedMask_32(Val)) { + // look for the first non-zero bit + MB = countLeadingZeros(Val); + // look for the first zero bit after the run of ones + ME = countLeadingZeros((Val - 1) ^ Val); + return true; + } else { + Val = ~Val; // invert mask + if (isShiftedMask_32(Val)) { + // effectively look for the first zero bit + ME = countLeadingZeros(Val) - 1; + // effectively look for the first one bit after the run of zeros + MB = countLeadingZeros((Val - 1) ^ Val) + 1; + return true; + } + } + // no run present + return false; +} + } // End llvm namespace // Generated files will use "namespace PPC". To avoid symbol clash, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp index f7259b9..44e69b7 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -378,8 +378,8 @@ void PPCMachObjectWriter::RecordPPCRelocation( Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); } -MCObjectWriter *llvm::createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit, - uint32_t CPUType, +MCObjectWriter *llvm::createPPCMachObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) { return createMachObjectWriter( new PPCMachObjectWriter(Is64Bit, CPUType, CPUSubtype), OS, diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index f175f6d..1a02bcc 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -86,6 +86,10 @@ def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true", "Enable the isel instruction">; def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "true", "Enable the popcnt[dw] instructions">; +def FeatureBPERMD : SubtargetFeature<"bpermd", "HasBPERMD", "true", + "Enable the bpermd instruction">; +def FeatureExtDiv : SubtargetFeature<"extdiv", "HasExtDiv", "true", + "Enable extended divide instructions">; def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true", "Enable the ldbrx instruction">; def FeatureCMPB : SubtargetFeature<"cmpb", "HasCMPB", "true", @@ -118,6 +122,10 @@ def FeatureP8Crypto : SubtargetFeature<"crypto", "HasP8Crypto", "true", def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true", "Enable POWER8 vector instructions", [FeatureVSX, FeatureP8Altivec]>; +def FeatureDirectMove : + SubtargetFeature<"direct-move", "HasDirectMove", "true", + "Enable Power8 direct move instructions", + [FeatureVSX]>; def FeaturePartwordAtomic : SubtargetFeature<"partword-atomics", "HasPartwordAtomics", "true", "Enable l[bh]arx and st[bh]cx.">; @@ -133,6 +141,38 @@ def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true", def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true", "Treat vector data stream cache control instructions as deprecated">; +/* Since new processors generally contain a superset of features of those that + came before them, the idea is to make implementations of new processors + less error prone and easier to read. + Namely: + list<SubtargetFeature> Power8FeatureList = ... + list<SubtargetFeature> FutureProcessorSpecificFeatureList = + [ features that Power8 does not support ] + list<SubtargetFeature> FutureProcessorFeatureList = + !listconcat(Power8FeatureList, FutureProcessorSpecificFeatureList) + + Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as + well as providing a single point of definition if the feature set will be + used elsewhere. +*/ +def ProcessorFeatures { + list<SubtargetFeature> Power7FeatureList = + [DirectivePwr7, FeatureAltivec, FeatureVSX, + FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, + FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, + FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, + FeatureFPRND, FeatureFPCVT, FeatureISEL, + FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, + Feature64Bit /*, Feature64BitRegs */, + FeatureBPERMD, FeatureExtDiv, + DeprecatedMFTB, DeprecatedDST]; + list<SubtargetFeature> Power8SpecificFeatures = + [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto, + FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic]; + list<SubtargetFeature> Power8FeatureList = + !listconcat(Power7FeatureList, Power8SpecificFeatures); +} + // Note: Future features to add when support is extended to more // recent ISA levels: // @@ -243,33 +283,6 @@ def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec, def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec, FeatureFRES, FeatureFRSQRTE]>; -/* Since new processors generally contain a superset of features of those that - came before them, the idea is to make implementations of new processors - less error prone and easier to read. - Namely: - list<SubtargetFeature> Power8FeatureList = ... - list<SubtargetFeature> FutureProcessorSpecificFeatureList = - [ features that Power8 does not support ] - list<SubtargetFeature> FutureProcessorFeatureList = - !listconcat(Power8FeatureList, FutureProcessorSpecificFeatureList) - - Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as - well as providing a single point of definition if the feature set will be - used elsewhere. - -*/ -def ProcessorFeatures { - list<SubtargetFeature> Power8FeatureList = - [DirectivePwr8, FeatureAltivec, FeatureP8Altivec, FeatureVSX, - FeatureP8Vector, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, - FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, - FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureHTM, - FeatureFPRND, FeatureFPCVT, FeatureISEL, - FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, FeatureP8Crypto, - Feature64Bit /*, Feature64BitRegs */, FeatureICBT, - FeaturePartwordAtomic, DeprecatedMFTB, DeprecatedDST]; -} - def : ProcessorModel<"970", G5Model, [Directive970, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, @@ -339,15 +352,7 @@ def : ProcessorModel<"pwr6x", G5Model, FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB, FeatureFPRND, Feature64Bit, DeprecatedMFTB, DeprecatedDST]>; -def : ProcessorModel<"pwr7", P7Model, - [DirectivePwr7, FeatureAltivec, FeatureVSX, - FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, - FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, - FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, - FeatureFPRND, FeatureFPCVT, FeatureISEL, - FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, - Feature64Bit /*, Feature64BitRegs */, FeaturePartwordAtomic, - DeprecatedMFTB, DeprecatedDST]>; +def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>; def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>; def : Processor<"ppc", G3Itineraries, [Directive32]>; def : ProcessorModel<"ppc64", G5Model, diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index cd60906..383a1e2 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -1105,25 +1105,6 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { } } - MachineModuleInfoELF &MMIELF = - MMI->getObjFileInfo<MachineModuleInfoELF>(); - - MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); - if (!Stubs.empty()) { - OutStreamer.SwitchSection(getObjFileLowering().getDataSection()); - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - // L_foo$stub: - OutStreamer.EmitLabel(Stubs[i].first); - // .long _foo - OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second.getPointer(), - OutContext), - isPPC64 ? 8 : 4/*size*/); - } - - Stubs.clear(); - OutStreamer.AddBlankLine(); - } - return AsmPrinter::doFinalization(M); } diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index fbd7b6d..002616b 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -958,6 +958,8 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg, } // Attempt to fast-select an integer-to-floating-point conversion. +// FIXME: Once fast-isel has better support for VSX, conversions using +// direct moves should be implemented. bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { MVT DstVT; Type *DstTy = I->getType(); @@ -1065,6 +1067,8 @@ unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT, } // Attempt to fast-select a floating-point-to-integer conversion. +// FIXME: Once fast-isel has better support for VSX, conversions using +// direct moves should be implemented. bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { MVT DstVT, SrcVT; Type *DstTy = I->getType(); @@ -1444,6 +1448,9 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) { else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 && RetVT != MVT::i8) return false; + else if (RetVT == MVT::i1 && PPCSubTarget->useCRBits()) + // We can't handle boolean returns when CR bits are in use. + return false; // FIXME: No multi-register return values yet. if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 && diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 3ac8e94..4f8d01b 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -105,13 +105,6 @@ namespace { return CurDAG->getTargetConstant(Imm, PPCLowering->getPointerTy()); } - /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s - /// with any number of 0s on either side. The 1s are allowed to wrap from - /// LSB to MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. - /// 0x0F0F0000 is not, since all 1s are not contiguous. - static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME); - - /// isRotateAndMask - Returns true if Mask and Shift can be folded into a /// rotate and mask opcode and mask operation. static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask, @@ -418,30 +411,6 @@ SDNode *PPCDAGToDAGISel::getFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) { getSmallIPtrImm(Offset)); } -bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) { - if (!Val) - return false; - - if (isShiftedMask_32(Val)) { - // look for the first non-zero bit - MB = countLeadingZeros(Val); - // look for the first zero bit after the run of ones - ME = countLeadingZeros((Val - 1) ^ Val); - return true; - } else { - Val = ~Val; // invert mask - if (isShiftedMask_32(Val)) { - // effectively look for the first zero bit - ME = countLeadingZeros(Val) - 1; - // effectively look for the first one bit after the run of zeros - MB = countLeadingZeros((Val - 1) ^ Val) + 1; - return true; - } - } - // no run present - return false; -} - bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask, unsigned &SH, unsigned &MB, unsigned &ME) { diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 871531e..4c0b6a6 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -996,6 +996,9 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; + case PPCISD::MFVSR: return "PPCISD::MFVSR"; + case PPCISD::MTVSRA: return "PPCISD::MTVSRA"; + case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ"; case PPCISD::VCMP: return "PPCISD::VCMP"; case PPCISD::VCMPo: return "PPCISD::VCMPo"; case PPCISD::LBRX: return "PPCISD::LBRX"; @@ -1287,22 +1290,6 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { return true; } -/// isAllNegativeZeroVector - Returns true if all elements of build_vector -/// are -0.0. -bool PPC::isAllNegativeZeroVector(SDNode *N) { - BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N); - - APInt APVal, APUndef; - unsigned BitSize; - bool HasAnyUndefs; - - if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true)) - if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) - return CFP->getValueAPF().isNegZero(); - - return false; -} - /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, @@ -2234,7 +2221,7 @@ SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG, // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2), - DAG.getConstant(12, MVT::i32), 8, false, true, + DAG.getConstant(12, MVT::i32), 8, false, true, false, MachinePointerInfo(), MachinePointerInfo()); } @@ -3821,7 +3808,7 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, SDLoc dl) { SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), - false, false, MachinePointerInfo(), + false, false, false, MachinePointerInfo(), MachinePointerInfo()); } @@ -5927,8 +5914,46 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, RLI.MPI = MPI; } +/// \brief Custom lowers floating point to integer conversions to use +/// the direct move instructions available in ISA 2.07 to avoid the +/// need for load/store combinations. +SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op, + SelectionDAG &DAG, + SDLoc dl) const { + assert(Op.getOperand(0).getValueType().isFloatingPoint()); + SDValue Src = Op.getOperand(0); + + if (Src.getValueType() == MVT::f32) + Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); + + SDValue Tmp; + switch (Op.getSimpleValueType().SimpleTy) { + default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); + case MVT::i32: + Tmp = DAG.getNode( + Op.getOpcode() == ISD::FP_TO_SINT + ? PPCISD::FCTIWZ + : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), + dl, MVT::f64, Src); + Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp); + break; + case MVT::i64: + assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && + "i64 FP_TO_UINT is supported only with FPCVT"); + Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : + PPCISD::FCTIDUZ, + dl, MVT::f64, Src); + Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp); + break; + } + return Tmp; +} + SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, SDLoc dl) const { + if (Subtarget.hasDirectMove() && Subtarget.isPPC64()) + return LowerFP_TO_INTDirectMove(Op, DAG, dl); + ReuseLoadInfo RLI; LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); @@ -6006,6 +6031,38 @@ void PPCTargetLowering::spliceIntoChain(SDValue ResChain, DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain); } +/// \brief Custom lowers integer to floating point conversions to use +/// the direct move instructions available in ISA 2.07 to avoid the +/// need for load/store combinations. +SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op, + SelectionDAG &DAG, + SDLoc dl) const { + assert((Op.getValueType() == MVT::f32 || + Op.getValueType() == MVT::f64) && + "Invalid floating point type as target of conversion"); + assert(Subtarget.hasFPCVT() && + "Int to FP conversions with direct moves require FPCVT"); + SDValue FP; + SDValue Src = Op.getOperand(0); + bool SinglePrec = Op.getValueType() == MVT::f32; + bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32; + bool Signed = Op.getOpcode() == ISD::SINT_TO_FP; + unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) : + (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU); + + if (WordInt) { + FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ, + dl, MVT::f64, Src); + FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); + } + else { + FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src); + FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); + } + + return FP; +} + SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -6041,6 +6098,11 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, DAG.getConstantFP(1.0, Op.getValueType()), DAG.getConstantFP(0.0, Op.getValueType())); + // If we have direct moves, we can do all the conversion, skip the store/load + // however, without FPCVT we can't do most conversions. + if (Subtarget.hasDirectMove() && Subtarget.isPPC64() && Subtarget.hasFPCVT()) + return LowerINT_TO_FPDirectMove(Op, DAG, dl); + assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"); @@ -6609,7 +6671,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, unsigned SplatBitSize; bool HasAnyUndefs; if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, - HasAnyUndefs, 0, true) || SplatBitSize > 32) + HasAnyUndefs, 0, !Subtarget.isLittleEndian()) || + SplatBitSize > 32) return SDValue(); unsigned SplatBits = APSplatBits.getZExtValue(); @@ -6676,22 +6739,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } - // The remaining cases assume either big endian element order or - // a splat-size that equates to the element size of the vector - // to be built. An example that doesn't work for little endian is - // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits - // and a vector element size of 16 bits. The code below will - // produce the vector in big endian element order, which for little - // endian is {-1, 0, -1, 0, -1, 0, -1, 0}. - - // For now, just avoid these optimizations in that case. - // FIXME: Develop correct optimizations for LE with mismatched - // splat and element sizes. - - if (Subtarget.isLittleEndian() && - SplatSize != Op.getValueType().getVectorElementType().getSizeInBits()) - return SDValue(); - // Check to see if this is a wide variety of vsplti*, binop self cases. static const signed char SplatCsts[] = { -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, @@ -7733,6 +7780,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: // LowerFP_TO_INT() can only handle f32 and f64. if (N->getOperand(0).getValueType() == MVT::ppcf128) return; @@ -11023,21 +11071,23 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { - const Function *F = MF.getFunction(); - // When expanding a memset, require at least two QPX instructions to cover - // the cost of loading the value to be stored from the constant pool. - if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) && - (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) && - !F->hasFnAttribute(Attribute::NoImplicitFloat)) { - return MVT::v4f64; - } - - // We should use Altivec/VSX loads and stores when available. For unaligned - // addresses, unaligned VSX loads are only fast starting with the P8. - if (Subtarget.hasAltivec() && Size >= 16 && - (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) || - ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) - return MVT::v4i32; + if (getTargetMachine().getOptLevel() != CodeGenOpt::None) { + const Function *F = MF.getFunction(); + // When expanding a memset, require at least two QPX instructions to cover + // the cost of loading the value to be stored from the constant pool. + if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) && + (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) && + !F->hasFnAttribute(Attribute::NoImplicitFloat)) { + return MVT::v4f64; + } + + // We should use Altivec/VSX loads and stores when available. For unaligned + // addresses, unaligned VSX loads are only fast starting with the P8. + if (Subtarget.hasAltivec() && Size >= 16 && + (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) || + ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) + return MVT::v4i32; + } if (Subtarget.isPPC64()) { return MVT::i64; diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 8afd7ef..7e2ebd4 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -119,6 +119,15 @@ namespace llvm { /// resultant GPR. Bits corresponding to other CR regs are undefined. MFOCRF, + /// Direct move from a VSX register to a GPR + MFVSR, + + /// Direct move from a GPR to a VSX register (algebraic) + MTVSRA, + + /// Direct move from a GPR to a VSX register (zero) + MTVSRZ, + // FIXME: Remove these once the ANDI glue bug is fixed: /// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the /// eq or gt bit of CR0 after executing andi. x, 1. This is used to @@ -368,10 +377,6 @@ namespace llvm { /// VSPLTB/VSPLTH/VSPLTW. bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize); - /// isAllNegativeZeroVector - Returns true if all elements of build_vector - /// are -0.0. - bool isAllNegativeZeroVector(SDNode *N); - /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG); @@ -649,6 +654,10 @@ namespace llvm { void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, SelectionDAG &DAG, SDLoc dl) const; + SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG, + SDLoc dl) const; + SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG, + SDLoc dl) const; SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const; diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 183d088..d1d67cb 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -603,6 +603,10 @@ defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS), def POPCNTD : XForm_11<31, 506, (outs g8rc:$rA), (ins g8rc:$rS), "popcntd $rA, $rS", IIC_IntGeneral, [(set i64:$rA, (ctpop i64:$rS))]>; +def BPERMD : XForm_6<31, 252, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "bpermd $rA, $rS, $rB", IIC_IntGeneral, + [(set i64:$rA, (int_ppc_bpermd g8rc:$rS, g8rc:$rB))]>, + isPPC64, Requires<[HasBPERMD]>; let isCodeGenOnly = 1, isCommutable = 1 in def CMPB8 : XForm_6<31, 508, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), @@ -616,14 +620,30 @@ def POPCNTW : XForm_11<31, 378, (outs gprc:$rA), (ins gprc:$rS), "popcntw $rA, $rS", IIC_IntGeneral, [(set i32:$rA, (ctpop i32:$rS))]>; -defm DIVD : XOForm_1r<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "divd", "$rT, $rA, $rB", IIC_IntDivD, - [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64, - PPC970_DGroup_First, PPC970_DGroup_Cracked; -defm DIVDU : XOForm_1r<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "divdu", "$rT, $rA, $rB", IIC_IntDivD, - [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64, - PPC970_DGroup_First, PPC970_DGroup_Cracked; +defm DIVD : XOForm_1rcr<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divd", "$rT, $rA, $rB", IIC_IntDivD, + [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64; +defm DIVDU : XOForm_1rcr<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divdu", "$rT, $rA, $rB", IIC_IntDivD, + [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64; +def DIVDE : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divde $rT, $rA, $rB", IIC_IntDivD, + [(set i64:$rT, (int_ppc_divde g8rc:$rA, g8rc:$rB))]>, + isPPC64, Requires<[HasExtDiv]>; +let Defs = [CR0] in +def DIVDEo : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divde. $rT, $rA, $rB", IIC_IntDivD, + []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First, + isPPC64, Requires<[HasExtDiv]>; +def DIVDEU : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divdeu $rT, $rA, $rB", IIC_IntDivD, + [(set i64:$rT, (int_ppc_divdeu g8rc:$rA, g8rc:$rB))]>, + isPPC64, Requires<[HasExtDiv]>; +let Defs = [CR0] in +def DIVDEUo : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divdeu. $rT, $rA, $rB", IIC_IntDivD, + []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First, + isPPC64, Requires<[HasExtDiv]>; let isCommutable = 1 in defm MULLD : XOForm_1r<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), "mulld", "$rT, $rA, $rB", IIC_IntMulHD, diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index b7a7a1f..43c2158 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -764,6 +764,12 @@ class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, let Inst{31} = XT{5}; } +class XX1_RS6_RD5_XO<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : XX1Form<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let B = 0; +} + class XX2Form<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> : I<opcode, OOL, IOL, asmstr, itin> { diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 5eff156..8aecb65 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -726,6 +726,8 @@ def HasICBT : Predicate<"PPCSubTarget->hasICBT()">; def HasPartwordAtomics : Predicate<"PPCSubTarget->hasPartwordAtomics()">; def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">; def NaNsFPMath : Predicate<"!TM.Options.NoNaNsFPMath">; +def HasBPERMD : Predicate<"PPCSubTarget->hasBPERMD()">; +def HasExtDiv : Predicate<"PPCSubTarget->hasExtDiv()">; //===----------------------------------------------------------------------===// // PowerPC Multiclass Definitions. @@ -802,6 +804,23 @@ multiclass XOForm_1r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, } } +// Multiclass for instructions for which the non record form is not cracked +// and the record form is cracked (i.e. divw, mullw, etc.) +multiclass XOForm_1rcr<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : XOForm_1<opcode, xo, oe, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR0] in + def o : XOForm_1<opcode, xo, oe, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel, PPC970_DGroup_First, + PPC970_DGroup_Cracked; + } +} + multiclass XOForm_1rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list<dag> pattern> { @@ -2300,14 +2319,30 @@ defm ADDC : XOForm_1rc<31, 10, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), [(set i32:$rT, (addc i32:$rA, i32:$rB))]>, PPC970_DGroup_Cracked; -defm DIVW : XOForm_1r<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "divw", "$rT, $rA, $rB", IIC_IntDivW, - [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>, - PPC970_DGroup_First, PPC970_DGroup_Cracked; -defm DIVWU : XOForm_1r<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "divwu", "$rT, $rA, $rB", IIC_IntDivW, - [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>, - PPC970_DGroup_First, PPC970_DGroup_Cracked; +defm DIVW : XOForm_1rcr<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divw", "$rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>; +defm DIVWU : XOForm_1rcr<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divwu", "$rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>; +def DIVWE : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divwe $rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (int_ppc_divwe gprc:$rA, gprc:$rB))]>, + Requires<[HasExtDiv]>; +let Defs = [CR0] in +def DIVWEo : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divwe. $rT, $rA, $rB", IIC_IntDivW, + []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First, + Requires<[HasExtDiv]>; +def DIVWEU : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divweu $rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (int_ppc_divweu gprc:$rA, gprc:$rB))]>, + Requires<[HasExtDiv]>; +let Defs = [CR0] in +def DIVWEUo : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divweu. $rT, $rA, $rB", IIC_IntDivW, + []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First, + Requires<[HasExtDiv]>; let isCommutable = 1 in { defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), "mulhw", "$rT, $rA, $rB", IIC_IntMulHW, @@ -3726,6 +3761,19 @@ def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCLo g8rc:$rA, g8rc:$rS, gprc:$rB, 0) def : InstAlias<"clrldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>; def : InstAlias<"clrldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>; +def RLWINMbm : PPCAsmPseudo<"rlwinm $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; +def RLWINMobm : PPCAsmPseudo<"rlwinm. $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; +def RLWIMIbm : PPCAsmPseudo<"rlwimi $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; +def RLWIMIobm : PPCAsmPseudo<"rlwimi. $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; +def RLWNMbm : PPCAsmPseudo<"rlwnm $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; +def RLWNMobm : PPCAsmPseudo<"rlwnm. $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; + // These generic branch instruction forms are used for the assembler parser only. // Defs and Uses are conservative, since we don't know the BO value. let PPC970_Unit = 7 in { diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index ec04da4..a98e58f 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -41,6 +41,9 @@ def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x, def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x, [SDNPHasChain, SDNPMayStore]>; def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>; +def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>; +def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>; +def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>; multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, @@ -946,6 +949,7 @@ def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B), when the elements are larger than i32. */ def HasP8Vector : Predicate<"PPCSubTarget->hasP8Vector()">; +def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">; let Predicates = [HasP8Vector] in { let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. let isCommutable = 1 in { @@ -965,3 +969,24 @@ def XXLORC : XX3Form<60, 170, [(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>; } // AddedComplexity = 500 } // HasP8Vector + +let Predicates = [HasDirectMove, HasVSX] in { +// VSX direct move instructions +def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT), + "mfvsrd $rA, $XT", IIC_VecGeneral, + [(set i64:$rA, (PPCmfvsr f64:$XT))]>, + Requires<[In64BitMode]>; +def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT), + "mfvsrwz $rA, $XT", IIC_VecGeneral, + [(set i32:$rA, (PPCmfvsr f64:$XT))]>; +def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$rA), + "mtvsrd $XT, $rA", IIC_VecGeneral, + [(set f64:$XT, (PPCmtvsra i64:$rA))]>, + Requires<[In64BitMode]>; +def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA), + "mtvsrwa $XT, $rA", IIC_VecGeneral, + [(set f64:$XT, (PPCmtvsra i32:$rA))]>; +def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA), + "mtvsrwz $XT, $rA", IIC_VecGeneral, + [(set f64:$XT, (PPCmtvsrz i32:$rA))]>; +} // HasDirectMove, HasVSX diff --git a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp index 005bcaf..2947c66 100644 --- a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp +++ b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "ppc-loop-data-prefetch" #include "PPC.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" @@ -110,11 +111,9 @@ bool PPCLoopDataPrefetch::runOnFunction(Function &F) { bool MadeChange = false; - for (LoopInfo::iterator I = LI->begin(), E = LI->end(); - I != E; ++I) { - Loop *L = *I; - MadeChange |= runOnLoop(L); - } + for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I) + for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L) + MadeChange |= runOnLoop(*L); return MadeChange; } diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp index 092a4ef..b6e7799 100644 --- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp +++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp @@ -22,6 +22,7 @@ #define DEBUG_TYPE "ppc-loop-preinc-prep" #include "PPC.h" #include "PPCTargetMachine.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" @@ -143,11 +144,9 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) { bool MadeChange = false; - for (LoopInfo::iterator I = LI->begin(), E = LI->end(); - I != E; ++I) { - Loop *L = *I; - MadeChange |= runOnLoop(L); - } + for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I) + for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L) + MadeChange |= runOnLoop(*L); return MadeChange; } @@ -159,16 +158,15 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { if (!L->empty()) return MadeChange; + DEBUG(dbgs() << "PIP: Examining: " << *L << "\n"); + BasicBlock *Header = L->getHeader(); const PPCSubtarget *ST = TM ? TM->getSubtargetImpl(*Header->getParent()) : nullptr; - unsigned HeaderLoopPredCount = 0; - for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); - PI != PE; ++PI) { - ++HeaderLoopPredCount; - } + unsigned HeaderLoopPredCount = + std::distance(pred_begin(Header), pred_end(Header)); // Collect buckets of comparable addresses used by loads and stores. typedef std::multimap<const SCEV *, Instruction *, SCEVLess> Bucket; @@ -205,9 +203,13 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { if (L->isLoopInvariant(PtrValue)) continue; - const SCEV *LSCEV = SE->getSCEV(PtrValue); - if (!isa<SCEVAddRecExpr>(LSCEV)) + const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L); + if (const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV)) { + if (LARSCEV->getLoop() != L) + continue; + } else { continue; + } bool FoundBucket = false; for (unsigned i = 0, e = Buckets.size(); i != e; ++i) @@ -236,11 +238,16 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { // returns a value (which might contribute to determining the loop's // iteration space), insert a new preheader for the loop. if (!LoopPredecessor || - !LoopPredecessor->getTerminator()->getType()->isVoidTy()) + !LoopPredecessor->getTerminator()->getType()->isVoidTy()) { LoopPredecessor = InsertPreheaderForLoop(L, this); + if (LoopPredecessor) + MadeChange = true; + } if (!LoopPredecessor) return MadeChange; + DEBUG(dbgs() << "PIP: Found " << Buckets.size() << " buckets\n"); + SmallSet<BasicBlock *, 16> BBChanged; for (unsigned i = 0, e = Buckets.size(); i != e; ++i) { // The base address of each bucket is transformed into a phi and the others @@ -251,6 +258,10 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { if (!BasePtrSCEV->isAffine()) continue; + DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n"); + assert(BasePtrSCEV->getLoop() == L && + "AddRec for the wrong loop?"); + Instruction *MemI = Buckets[i].begin()->second; Value *BasePtr = GetPointerOperand(MemI); assert(BasePtr && "No pointer operand"); @@ -271,6 +282,8 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { if (!isSafeToExpand(BasePtrStartSCEV, *SE)) continue; + DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n"); + PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount, MemI->hasName() ? MemI->getName() + ".phi" : "", Header->getFirstNonPHI()); diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index 0965cb3..6df89fe 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -66,7 +66,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ unsigned OrigLen = Name.size() - PrefixLen; Name += Suffix; - MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name); StringRef OrigName = StringRef(Name).substr(PrefixLen, OrigLen); // If the target flags on the operand changes the name of the symbol, do that diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index ed88803..f313b0a 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -21,7 +21,6 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Host.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetMachine.h" #include <cstdlib> @@ -83,6 +82,8 @@ void PPCSubtarget::initializeEnvironment() { HasFPCVT = false; HasISEL = false; HasPOPCNTD = false; + HasBPERMD = false; + HasExtDiv = false; HasCMPB = false; HasLDBRX = false; IsBookE = false; @@ -96,6 +97,7 @@ void PPCSubtarget::initializeEnvironment() { HasICBT = false; HasInvariantFunctionDescriptors = false; HasPartwordAtomics = false; + HasDirectMove = false; IsQPXStackUnaligned = false; HasHTM = false; } @@ -110,11 +112,6 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { else CPUName = "generic"; } -#if (defined(__APPLE__) || defined(__linux__)) && \ - (defined(__ppc__) || defined(__powerpc__)) - if (CPUName == "generic") - CPUName = sys::getHostCPUName(); -#endif // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUName); diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index b4c1bb1..8d95508 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -101,6 +101,8 @@ protected: bool HasFPCVT; bool HasISEL; bool HasPOPCNTD; + bool HasBPERMD; + bool HasExtDiv; bool HasCMPB; bool HasLDBRX; bool IsBookE; @@ -115,6 +117,7 @@ protected: bool HasICBT; bool HasInvariantFunctionDescriptors; bool HasPartwordAtomics; + bool HasDirectMove; bool HasHTM; /// When targeting QPX running a stock PPC64 Linux kernel where the stack @@ -225,6 +228,8 @@ public: bool hasMFOCRF() const { return HasMFOCRF; } bool hasISEL() const { return HasISEL; } bool hasPOPCNTD() const { return HasPOPCNTD; } + bool hasBPERMD() const { return HasBPERMD; } + bool hasExtDiv() const { return HasExtDiv; } bool hasCMPB() const { return HasCMPB; } bool hasLDBRX() const { return HasLDBRX; } bool isBookE() const { return IsBookE; } @@ -239,6 +244,7 @@ public: return HasInvariantFunctionDescriptors; } bool hasPartwordAtomics() const { return HasPartwordAtomics; } + bool hasDirectMove() const { return HasDirectMove; } bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; } unsigned getPlatformStackAlignment() const { diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h index 6493713..8aaf5e1 100644 --- a/lib/Target/PowerPC/PPCTargetStreamer.h +++ b/lib/Target/PowerPC/PPCTargetStreamer.h @@ -16,7 +16,7 @@ namespace llvm { class PPCTargetStreamer : public MCTargetStreamer { public: PPCTargetStreamer(MCStreamer &S); - virtual ~PPCTargetStreamer(); + ~PPCTargetStreamer() override; virtual void emitTCEntry(const MCSymbol &S) = 0; virtual void emitMachine(StringRef CPU) = 0; virtual void emitAbiVersion(int AbiVersion) = 0; diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt index dfe988f..01233ae 100644 --- a/lib/Target/PowerPC/README.txt +++ b/lib/Target/PowerPC/README.txt @@ -622,6 +622,25 @@ void foo() { __asm__("" ::: "cr2"); } +//===-------------------------------------------------------------------------=== +Naming convention for instruction formats is very haphazard. +We have agreed on a naming scheme as follows: + +<INST_form>{_<OP_type><OP_len>}+ + +Where: +INST_form is the instruction format (X-form, etc.) +OP_type is the operand type - one of OPC (opcode), RD (register destination), + RS (register source), + RDp (destination register pair), + RSp (source register pair), IM (immediate), + XO (extended opcode) +OP_len is the length of the operand in bits + +VSX register operands would be of length 6 (split across two fields), +condition register fields of length 3. +We would not need denote reserved fields in names of instruction formats. + //===----------------------------------------------------------------------===// Instruction fusion was introduced in ISA 2.06 and more opportunities added in diff --git a/lib/Target/PowerPC/README_ALTIVEC.txt b/lib/Target/PowerPC/README_ALTIVEC.txt index 43d87d3..1d5b092 100644 --- a/lib/Target/PowerPC/README_ALTIVEC.txt +++ b/lib/Target/PowerPC/README_ALTIVEC.txt @@ -277,7 +277,7 @@ This will generate the following instruction sequence: This will almost certainly cause a load-hit-store hazard. Since val is a value parameter, it should not need to be saved onto the stack, unless it's being done set up the vector register. Instead, -it would be better to splat teh value into a vector register, and then +it would be better to splat the value into a vector register, and then remove the (dead) stores to the stack. //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td index e5d5ce2..2eb805e 100644 --- a/lib/Target/R600/AMDGPU.td +++ b/lib/Target/R600/AMDGPU.td @@ -133,6 +133,20 @@ class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature< !cast<string>(Value), "The size of local memory in bytes">; +def FeatureGCN : SubtargetFeature<"gcn", + "IsGCN", + "true", + "GCN or newer GPU">; + +def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding", + "GCN1Encoding", + "true", + "Encoding format for SI and CI">; + +def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding", + "GCN3Encoding", + "true", + "Encoding format for VI">; class SubtargetFeatureGeneration <string Value, list<SubtargetFeature> Implies> : SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value, @@ -158,15 +172,17 @@ def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS", def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS", [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768, - FeatureWavefrontSize64]>; + FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding]>; def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS", [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536, - FeatureWavefrontSize64, FeatureFlatAddressSpace]>; + FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace, + FeatureGCN1Encoding]>; def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS", [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536, - FeatureWavefrontSize64, FeatureFlatAddressSpace]>; + FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, + FeatureGCN3Encoding]>; //===----------------------------------------------------------------------===// @@ -197,8 +213,10 @@ def NullALU : InstrItinClass; class PredicateControl { Predicate SubtargetPredicate; + list<Predicate> AssemblerPredicates = []; list<Predicate> OtherPredicates = []; list<Predicate> Predicates = !listconcat([SubtargetPredicate], + AssemblerPredicates, OtherPredicates); } diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp index d911014..b3480b4 100644 --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -17,6 +17,7 @@ // #include "AMDGPUAsmPrinter.h" +#include "InstPrinter/AMDGPUInstPrinter.h" #include "AMDGPU.h" #include "AMDKernelCodeT.h" #include "AMDGPUSubtarget.h" @@ -574,3 +575,24 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF, OutStreamer.EmitBytes(StringRef((char*)&header, sizeof(header))); } + +bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) + return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); + case 'r': + break; + } + } + + AMDGPUInstPrinter::printRegOperand(MI->getOperand(OpNo).getReg(), O, + *TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo()); + return false; +} diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h index 58ffb1e..1acff3a 100644 --- a/lib/Target/R600/AMDGPUAsmPrinter.h +++ b/lib/Target/R600/AMDGPUAsmPrinter.h @@ -99,6 +99,10 @@ public: void EmitEndOfAsmFile(Module &M) override; + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O) override; + protected: std::vector<std::string> DisasmLines, HexLines; size_t DisasmLineMaxLen; diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 7341cd9..def252a 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -345,7 +345,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { unsigned NOps = N->getNumOperands(); for (unsigned i = 0; i < NOps; i++) { // XXX: Why is this here? - if (dyn_cast<RegisterSDNode>(N->getOperand(i))) { + if (isa<RegisterSDNode>(N->getOperand(i))) { IsRegSeq = false; break; } diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 62a33fa..7c5235d 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -126,6 +126,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM, setOperationAction(ISD::FFLOOR, MVT::f32, Legal); setOperationAction(ISD::FRINT, MVT::f32, Legal); setOperationAction(ISD::FTRUNC, MVT::f32, Legal); + setOperationAction(ISD::FMINNUM, MVT::f32, Legal); + setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); setOperationAction(ISD::FROUND, MVT::f32, Custom); setOperationAction(ISD::FROUND, MVT::f64, Custom); @@ -1685,14 +1687,8 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op, const unsigned bitPos = halfBitWidth - i - 1; SDValue POS = DAG.getConstant(bitPos, HalfVT); // Get value of high bit - // TODO: Remove the BFE part when the optimization is fixed - SDValue HBit; - if (halfBitWidth == 32 && Subtarget->hasBFE()) { - HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one); - } else { - HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS); - HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one); - } + SDValue HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS); + HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one); HBit = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, HBit); // Shift diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 4d08201..eeb7f3f 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -358,7 +358,7 @@ def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>; def mskor_global : PatFrag<(ops node:$val, node:$ptr), (AMDGPUstore_mskor node:$val, node:$ptr), [{ - return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; + return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; }]>; @@ -389,7 +389,7 @@ def flat_store : PatFrag<(ops node:$val, node:$ptr), def mskor_flat : PatFrag<(ops node:$val, node:$ptr), (AMDGPUstore_mskor node:$val, node:$ptr), [{ - return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS; + return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS; }]>; class global_binary_atomic_op<SDNode atomic_op> : PatFrag< diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp index f047ed0..7e274a9 100644 --- a/lib/Target/R600/AMDGPUMCInstLower.cpp +++ b/lib/Target/R600/AMDGPUMCInstLower.cpp @@ -124,7 +124,8 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) { AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(), *MF->getSubtarget().getInstrInfo(), *MF->getSubtarget().getRegisterInfo()); - InstPrinter.printInst(&TmpInst, DisasmStream, StringRef()); + InstPrinter.printInst(&TmpInst, DisasmStream, StringRef(), + MF->getSubtarget()); // Disassemble instruction/operands to hex representation. SmallVector<MCFixup, 4> Fixups; diff --git a/lib/Target/R600/AMDGPUPromoteAlloca.cpp b/lib/Target/R600/AMDGPUPromoteAlloca.cpp index 175dcd8..6d5f94e 100644 --- a/lib/Target/R600/AMDGPUPromoteAlloca.cpp +++ b/lib/Target/R600/AMDGPUPromoteAlloca.cpp @@ -366,8 +366,8 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) { Function *F = Call->getCalledFunction(); FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes, F->isVarArg()); - Constant *C = Mod->getOrInsertFunction(StringRef(F->getName().str() + ".local"), NewType, - F->getAttributes()); + Constant *C = Mod->getOrInsertFunction((F->getName() + ".local").str(), + NewType, F->getAttributes()); Function *NewF = cast<Function>(C); Call->setCalledFunction(NewF); continue; diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp index 0ead652..259224a 100644 --- a/lib/Target/R600/AMDGPUSubtarget.cpp +++ b/lib/Target/R600/AMDGPUSubtarget.cpp @@ -71,6 +71,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS, EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0), EnableVGPRSpilling(false), SGPRInitBug(false), + IsGCN(false), GCN1Encoding(false), GCN3Encoding(false), FrameLowering(TargetFrameLowering::StackGrowsUp, 64 * 16, // Maximum stack alignment (long16) 0), diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index 403a3e4..aeb0817 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -71,6 +71,9 @@ private: int LocalMemorySize; bool EnableVGPRSpilling; bool SGPRInitBug; + bool IsGCN; + bool GCN1Encoding; + bool GCN3Encoding; AMDGPUFrameLowering FrameLowering; std::unique_ptr<AMDGPUTargetLowering> TLInfo; diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp index ee6551b..c9b25a1 100644 --- a/lib/Target/R600/AMDILCFGStructurizer.cpp +++ b/lib/Target/R600/AMDILCFGStructurizer.cpp @@ -623,7 +623,7 @@ DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) { for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end(); ++It) { MachineInstr *instr = &(*It); - if (!instr->getDebugLoc().isUnknown()) + if (instr->getDebugLoc()) DL = instr->getDebugLoc(); } return DL; diff --git a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp index 49f0f23..aaf9b32 100644 --- a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp @@ -8,6 +8,8 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIDefines.h" +#include "llvm/ADT/APFloat.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" @@ -27,76 +29,105 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Debug.h" using namespace llvm; namespace { -class AMDGPUAsmParser : public MCTargetAsmParser { - MCSubtargetInfo &STI; - MCAsmParser &Parser; - - - /// @name Auto-generated Match Functions - /// { - -#define GET_ASSEMBLER_HEADER -#include "AMDGPUGenAsmMatcher.inc" - - /// } - -public: - AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser, - const MCInstrInfo &MII, const MCTargetOptions &Options) - : MCTargetAsmParser(), STI(STI), Parser(Parser) { - setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); - } - bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - OperandVector &Operands, MCStreamer &Out, - uint64_t &ErrorInfo, - bool MatchingInlineAsm) override; - bool ParseDirective(AsmToken DirectiveID) override; - OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic); - bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, - SMLoc NameLoc, OperandVector &Operands) override; - - bool parseCnt(int64_t &IntVal); - OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); -}; +struct OptionalOperand; class AMDGPUOperand : public MCParsedAsmOperand { enum KindTy { Token, - Immediate + Immediate, + Register, + Expression } Kind; + SMLoc StartLoc, EndLoc; + public: AMDGPUOperand(enum KindTy K) : MCParsedAsmOperand(), Kind(K) {} + MCContext *Ctx; + + enum ImmTy { + ImmTyNone, + ImmTyDSOffset0, + ImmTyDSOffset1, + ImmTyGDS, + ImmTyOffset, + ImmTyGLC, + ImmTySLC, + ImmTyTFE, + ImmTyClamp, + ImmTyOMod + }; + struct TokOp { const char *Data; unsigned Length; }; struct ImmOp { + bool IsFPImm; + ImmTy Type; int64_t Val; }; + struct RegOp { + unsigned RegNo; + int Modifiers; + const MCRegisterInfo *TRI; + }; + union { TokOp Tok; ImmOp Imm; + RegOp Reg; + const MCExpr *Expr; }; void addImmOperands(MCInst &Inst, unsigned N) const { Inst.addOperand(MCOperand::CreateImm(getImm())); } - void addRegOperands(MCInst &Inst, unsigned N) const { - llvm_unreachable("addRegOperands"); - } + StringRef getToken() const { return StringRef(Tok.Data, Tok.Length); } + + void addRegOperands(MCInst &Inst, unsigned N) const { + Inst.addOperand(MCOperand::CreateReg(getReg())); + } + + void addRegOrImmOperands(MCInst &Inst, unsigned N) const { + if (isReg()) + addRegOperands(Inst, N); + else + addImmOperands(Inst, N); + } + + void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { + Inst.addOperand(MCOperand::CreateImm(Reg.Modifiers)); + addRegOperands(Inst, N); + } + + void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { + if (isImm()) + addImmOperands(Inst, N); + else { + assert(isExpr()); + Inst.addOperand(MCOperand::CreateExpr(Expr)); + } + } + + bool defaultTokenHasSuffix() const { + StringRef Token(Tok.Data, Tok.Length); + + return Token.endswith("_e32") || Token.endswith("_e64"); + } + bool isToken() const override { return Kind == Token; } @@ -105,52 +136,369 @@ public: return Kind == Immediate; } + bool isInlineImm() const { + float F = BitsToFloat(Imm.Val); + // TODO: Add 0.5pi for VI + return isImm() && ((Imm.Val <= 64 && Imm.Val >= -16) || + (F == 0.0 || F == 0.5 || F == -0.5 || F == 1.0 || F == -1.0 || + F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0)); + } + + bool isDSOffset0() const { + assert(isImm()); + return Imm.Type == ImmTyDSOffset0; + } + + bool isDSOffset1() const { + assert(isImm()); + return Imm.Type == ImmTyDSOffset1; + } + int64_t getImm() const { return Imm.Val; } + enum ImmTy getImmTy() const { + assert(isImm()); + return Imm.Type; + } + bool isReg() const override { - return false; + return Kind == Register && Reg.Modifiers == -1; + } + + bool isRegWithInputMods() const { + return Kind == Register && Reg.Modifiers != -1; + } + + void setModifiers(unsigned Mods) { + assert(isReg()); + Reg.Modifiers = Mods; } unsigned getReg() const override { - return 0; + return Reg.RegNo; + } + + bool isRegOrImm() const { + return isReg() || isImm(); + } + + bool isRegClass(unsigned RCID) const { + return Reg.TRI->getRegClass(RCID).contains(getReg()); + } + + bool isSCSrc32() const { + return isInlineImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID)); + } + + bool isSSrc32() const { + return isImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID)); + } + + bool isSSrc64() const { + return isImm() || isInlineImm() || + (isReg() && isRegClass(AMDGPU::SReg_64RegClassID)); + } + + bool isVCSrc32() const { + return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID)); + } + + bool isVCSrc64() const { + return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID)); + } + + bool isVSrc32() const { + return isImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID)); + } + + bool isVSrc64() const { + return isImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID)); } bool isMem() const override { return false; } + bool isExpr() const { + return Kind == Expression; + } + + bool isSoppBrTarget() const { + return isExpr() || isImm(); + } + SMLoc getStartLoc() const override { - return SMLoc(); + return StartLoc; } SMLoc getEndLoc() const override { - return SMLoc(); + return EndLoc; } void print(raw_ostream &OS) const override { } - static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val) { + static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val, SMLoc Loc, + enum ImmTy Type = ImmTyNone, + bool IsFPImm = false) { auto Op = llvm::make_unique<AMDGPUOperand>(Immediate); Op->Imm.Val = Val; + Op->Imm.IsFPImm = IsFPImm; + Op->Imm.Type = Type; + Op->StartLoc = Loc; + Op->EndLoc = Loc; return Op; } - static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc) { + static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc, + bool HasExplicitEncodingSize = true) { auto Res = llvm::make_unique<AMDGPUOperand>(Token); Res->Tok.Data = Str.data(); Res->Tok.Length = Str.size(); + Res->StartLoc = Loc; + Res->EndLoc = Loc; return Res; } + static std::unique_ptr<AMDGPUOperand> CreateReg(unsigned RegNo, SMLoc S, + SMLoc E, + const MCRegisterInfo *TRI) { + auto Op = llvm::make_unique<AMDGPUOperand>(Register); + Op->Reg.RegNo = RegNo; + Op->Reg.TRI = TRI; + Op->Reg.Modifiers = -1; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static std::unique_ptr<AMDGPUOperand> CreateExpr(const class MCExpr *Expr, SMLoc S) { + auto Op = llvm::make_unique<AMDGPUOperand>(Expression); + Op->Expr = Expr; + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } + + bool isDSOffset() const; + bool isDSOffset01() const; bool isSWaitCnt() const; + bool isMubufOffset() const; }; +class AMDGPUAsmParser : public MCTargetAsmParser { + MCSubtargetInfo &STI; + const MCInstrInfo &MII; + MCAsmParser &Parser; + + unsigned ForcedEncodingSize; + /// @name Auto-generated Match Functions + /// { + +#define GET_ASSEMBLER_HEADER +#include "AMDGPUGenAsmMatcher.inc" + + /// } + +public: + AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &_Parser, + const MCInstrInfo &MII, + const MCTargetOptions &Options) + : MCTargetAsmParser(), STI(STI), MII(MII), Parser(_Parser), + ForcedEncodingSize(0){ + + if (!STI.getFeatureBits()) { + // Set default features. + STI.ToggleFeature("SOUTHERN_ISLANDS"); + } + + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + } + + unsigned getForcedEncodingSize() const { + return ForcedEncodingSize; + } + + void setForcedEncodingSize(unsigned Size) { + ForcedEncodingSize = Size; + } + + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; + unsigned checkTargetMatchPredicate(MCInst &Inst) override; + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) override; + bool ParseDirective(AsmToken DirectiveID) override; + OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic); + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) override; + + OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int, + int64_t Default = 0); + OperandMatchResultTy parseIntWithPrefix(const char *Prefix, + OperandVector &Operands, + enum AMDGPUOperand::ImmTy ImmTy = + AMDGPUOperand::ImmTyNone); + OperandMatchResultTy parseNamedBit(const char *Name, OperandVector &Operands, + enum AMDGPUOperand::ImmTy ImmTy = + AMDGPUOperand::ImmTyNone); + OperandMatchResultTy parseOptionalOps( + const ArrayRef<OptionalOperand> &OptionalOps, + OperandVector &Operands); + + + void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); + void cvtDS(MCInst &Inst, const OperandVector &Operands); + OperandMatchResultTy parseDSOptionalOps(OperandVector &Operands); + OperandMatchResultTy parseDSOff01OptionalOps(OperandVector &Operands); + OperandMatchResultTy parseDSOffsetOptional(OperandVector &Operands); + + bool parseCnt(int64_t &IntVal); + OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); + OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); + + void cvtMubuf(MCInst &Inst, const OperandVector &Operands); + OperandMatchResultTy parseOffset(OperandVector &Operands); + OperandMatchResultTy parseMubufOptionalOps(OperandVector &Operands); + OperandMatchResultTy parseGLC(OperandVector &Operands); + OperandMatchResultTy parseSLC(OperandVector &Operands); + OperandMatchResultTy parseTFE(OperandVector &Operands); + + OperandMatchResultTy parseDMask(OperandVector &Operands); + OperandMatchResultTy parseUNorm(OperandVector &Operands); + OperandMatchResultTy parseR128(OperandVector &Operands); + + void cvtVOP3(MCInst &Inst, const OperandVector &Operands); + OperandMatchResultTy parseVOP3OptionalOps(OperandVector &Operands); +}; + +struct OptionalOperand { + const char *Name; + AMDGPUOperand::ImmTy Type; + bool IsBit; + int64_t Default; + bool (*ConvertResult)(int64_t&); +}; + +} + +static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) { + if (IsVgpr) { + switch (RegWidth) { + default: llvm_unreachable("Unknown register width"); + case 1: return AMDGPU::VGPR_32RegClassID; + case 2: return AMDGPU::VReg_64RegClassID; + case 3: return AMDGPU::VReg_96RegClassID; + case 4: return AMDGPU::VReg_128RegClassID; + case 8: return AMDGPU::VReg_256RegClassID; + case 16: return AMDGPU::VReg_512RegClassID; + } + } + + switch (RegWidth) { + default: llvm_unreachable("Unknown register width"); + case 1: return AMDGPU::SGPR_32RegClassID; + case 2: return AMDGPU::SGPR_64RegClassID; + case 4: return AMDGPU::SReg_128RegClassID; + case 8: return AMDGPU::SReg_256RegClassID; + case 16: return AMDGPU::SReg_512RegClassID; + } +} + +static unsigned getRegForName(const StringRef &RegName) { + + return StringSwitch<unsigned>(RegName) + .Case("exec", AMDGPU::EXEC) + .Case("vcc", AMDGPU::VCC) + .Case("flat_scr", AMDGPU::FLAT_SCR) + .Case("m0", AMDGPU::M0) + .Case("scc", AMDGPU::SCC) + .Case("flat_scr_lo", AMDGPU::FLAT_SCR_LO) + .Case("flat_scr_hi", AMDGPU::FLAT_SCR_HI) + .Case("vcc_lo", AMDGPU::VCC_LO) + .Case("vcc_hi", AMDGPU::VCC_HI) + .Case("exec_lo", AMDGPU::EXEC_LO) + .Case("exec_hi", AMDGPU::EXEC_HI) + .Default(0); } bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { - return true; + const AsmToken Tok = Parser.getTok(); + StartLoc = Tok.getLoc(); + EndLoc = Tok.getEndLoc(); + const StringRef &RegName = Tok.getString(); + RegNo = getRegForName(RegName); + + if (RegNo) { + Parser.Lex(); + return false; + } + + // Match vgprs and sgprs + if (RegName[0] != 's' && RegName[0] != 'v') + return true; + + bool IsVgpr = RegName[0] == 'v'; + unsigned RegWidth; + unsigned RegIndexInClass; + if (RegName.size() > 1) { + // We have a 32-bit register + RegWidth = 1; + if (RegName.substr(1).getAsInteger(10, RegIndexInClass)) + return true; + Parser.Lex(); + } else { + // We have a register greater than 32-bits. + + int64_t RegLo, RegHi; + Parser.Lex(); + if (getLexer().isNot(AsmToken::LBrac)) + return true; + + Parser.Lex(); + if (getParser().parseAbsoluteExpression(RegLo)) + return true; + + if (getLexer().isNot(AsmToken::Colon)) + return true; + + Parser.Lex(); + if (getParser().parseAbsoluteExpression(RegHi)) + return true; + + if (getLexer().isNot(AsmToken::RBrac)) + return true; + + Parser.Lex(); + RegWidth = (RegHi - RegLo) + 1; + if (IsVgpr) { + // VGPR registers aren't aligned. + RegIndexInClass = RegLo; + } else { + // SGPR registers are aligned. Max alignment is 4 dwords. + RegIndexInClass = RegLo / std::min(RegWidth, 4u); + } + } + + const MCRegisterInfo *TRC = getContext().getRegisterInfo(); + unsigned RC = getRegClass(IsVgpr, RegWidth); + if (RegIndexInClass > TRC->getRegClass(RC).getNumRegs()) + return true; + RegNo = TRC->getRegClass(RC).getRegister(RegIndexInClass); + return false; +} + +unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { + + uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; + + if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || + (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3))) + return Match_InvalidOperand; + + return Match_Success; } @@ -162,22 +510,30 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, MCInst Inst; switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) { - case Match_Success: - Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst, STI); - return false; - case Match_MissingFeature: - return Error(IDLoc, "instruction use requires an option to be enabled"); - case Match_MnemonicFail: - return Error(IDLoc, "unrecognized instruction mnemonic"); - case Match_InvalidOperand: { - if (ErrorInfo != ~0ULL) { - if (ErrorInfo >= Operands.size()) - return Error(IDLoc, "too few operands for instruction"); + default: break; + case Match_Success: + Inst.setLoc(IDLoc); + Out.EmitInstruction(Inst, STI); + return false; + case Match_MissingFeature: + return Error(IDLoc, "missing feature"); + + case Match_MnemonicFail: + return Error(IDLoc, "unrecognized instruction mnemonic"); + + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0ULL) { + if (ErrorInfo >= Operands.size()) { + return Error(IDLoc, "too few operands for instruction"); + } + ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); + if (ErrorLoc == SMLoc()) + ErrorLoc = IDLoc; + } + return Error(ErrorLoc, "invalid operand for instruction"); } - return Error(IDLoc, "invalid operand for instruction"); - } } llvm_unreachable("Implement any new match types added!"); } @@ -186,6 +542,19 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { return true; } +static bool operandsHaveModifiers(const OperandVector &Operands) { + + for (unsigned i = 0, e = Operands.size(); i != e; ++i) { + const AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]); + if (Op.isRegWithInputMods()) + return true; + if (Op.isImm() && (Op.getImmTy() == AMDGPUOperand::ImmTyOMod || + Op.getImmTy() == AMDGPUOperand::ImmTyClamp)) + return true; + } + return false; +} + AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { @@ -194,17 +563,104 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { // If we successfully parsed the operand or if there as an error parsing, // we are done. - if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail) + // + // If we are parsing after we reach EndOfStatement then this means we + // are appending default values to the Operands list. This is only done + // by custom parser, so we shouldn't continue on to the generic parsing. + if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || + getLexer().is(AsmToken::EndOfStatement)) return ResTy; + bool Negate = false, Abs = false; + if (getLexer().getKind()== AsmToken::Minus) { + Parser.Lex(); + Negate = true; + } + + if (getLexer().getKind() == AsmToken::Pipe) { + Parser.Lex(); + Abs = true; + } + switch(getLexer().getKind()) { case AsmToken::Integer: { + SMLoc S = Parser.getTok().getLoc(); int64_t IntVal; if (getParser().parseAbsoluteExpression(IntVal)) return MatchOperand_ParseFail; - Operands.push_back(AMDGPUOperand::CreateImm(IntVal)); + APInt IntVal32(32, IntVal); + if (IntVal32.getSExtValue() != IntVal) { + Error(S, "invalid immediate: only 32-bit values are legal"); + return MatchOperand_ParseFail; + } + + IntVal = IntVal32.getSExtValue(); + if (Negate) + IntVal *= -1; + Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S)); return MatchOperand_Success; } + case AsmToken::Real: { + // FIXME: We should emit an error if a double precisions floating-point + // value is used. I'm not sure the best way to detect this. + SMLoc S = Parser.getTok().getLoc(); + int64_t IntVal; + if (getParser().parseAbsoluteExpression(IntVal)) + return MatchOperand_ParseFail; + + APFloat F((float)BitsToDouble(IntVal)); + if (Negate) + F.changeSign(); + Operands.push_back( + AMDGPUOperand::CreateImm(F.bitcastToAPInt().getZExtValue(), S)); + return MatchOperand_Success; + } + case AsmToken::Identifier: { + SMLoc S, E; + unsigned RegNo; + if (!ParseRegister(RegNo, S, E)) { + + bool HasModifiers = operandsHaveModifiers(Operands); + unsigned Modifiers = 0; + + if (Negate) + Modifiers |= 0x1; + + if (Abs) { + if (getLexer().getKind() != AsmToken::Pipe) + return MatchOperand_ParseFail; + Parser.Lex(); + Modifiers |= 0x2; + } + + if (Modifiers && !HasModifiers) { + // We are adding a modifier to src1 or src2 and previous sources + // don't have modifiers, so we need to go back and empty modifers + // for each previous source. + for (unsigned PrevRegIdx = Operands.size() - 1; PrevRegIdx > 1; + --PrevRegIdx) { + + AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[PrevRegIdx]); + RegOp.setModifiers(0); + } + } + + + Operands.push_back(AMDGPUOperand::CreateReg( + RegNo, S, E, getContext().getRegisterInfo())); + + if (HasModifiers || Modifiers) { + AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[Operands.size() - 1]); + RegOp.setModifiers(Modifiers); + + } + } else { + Operands.push_back(AMDGPUOperand::CreateToken(Parser.getTok().getString(), + S)); + Parser.Lex(); + } + return MatchOperand_Success; + } default: return MatchOperand_NoMatch; } @@ -213,22 +669,282 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) { + + // Clear any forced encodings from the previous instruction. + setForcedEncodingSize(0); + + if (Name.endswith("_e64")) + setForcedEncodingSize(64); + else if (Name.endswith("_e32")) + setForcedEncodingSize(32); + // Add the instruction mnemonic Operands.push_back(AMDGPUOperand::CreateToken(Name, NameLoc)); - if (getLexer().is(AsmToken::EndOfStatement)) - return false; + while (!getLexer().is(AsmToken::EndOfStatement)) { + AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name); + + // Eat the comma or space if there is one. + if (getLexer().is(AsmToken::Comma)) + Parser.Lex(); - AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name); - switch (Res) { - case MatchOperand_Success: return false; - case MatchOperand_ParseFail: return Error(NameLoc, - "Failed parsing operand"); - case MatchOperand_NoMatch: return Error(NameLoc, "Not a valid operand"); + switch (Res) { + case MatchOperand_Success: break; + case MatchOperand_ParseFail: return Error(getLexer().getLoc(), + "failed parsing operand."); + case MatchOperand_NoMatch: return Error(getLexer().getLoc(), + "not a valid operand."); + } } - return true; + + // Once we reach end of statement, continue parsing so we can add default + // values for optional arguments. + AMDGPUAsmParser::OperandMatchResultTy Res; + while ((Res = parseOperand(Operands, Name)) != MatchOperand_NoMatch) { + if (Res != MatchOperand_Success) + return Error(getLexer().getLoc(), "failed parsing operand."); + } + return false; +} + +//===----------------------------------------------------------------------===// +// Utility functions +//===----------------------------------------------------------------------===// + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int, + int64_t Default) { + + // We are at the end of the statement, and this is a default argument, so + // use a default value. + if (getLexer().is(AsmToken::EndOfStatement)) { + Int = Default; + return MatchOperand_Success; + } + + switch(getLexer().getKind()) { + default: return MatchOperand_NoMatch; + case AsmToken::Identifier: { + StringRef OffsetName = Parser.getTok().getString(); + if (!OffsetName.equals(Prefix)) + return MatchOperand_NoMatch; + + Parser.Lex(); + if (getLexer().isNot(AsmToken::Colon)) + return MatchOperand_ParseFail; + + Parser.Lex(); + if (getLexer().isNot(AsmToken::Integer)) + return MatchOperand_ParseFail; + + if (getParser().parseAbsoluteExpression(Int)) + return MatchOperand_ParseFail; + break; + } + } + return MatchOperand_Success; +} + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, + enum AMDGPUOperand::ImmTy ImmTy) { + + SMLoc S = Parser.getTok().getLoc(); + int64_t Offset = 0; + + AMDGPUAsmParser::OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Offset); + if (Res != MatchOperand_Success) + return Res; + + Operands.push_back(AMDGPUOperand::CreateImm(Offset, S, ImmTy)); + return MatchOperand_Success; +} + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, + enum AMDGPUOperand::ImmTy ImmTy) { + int64_t Bit = 0; + SMLoc S = Parser.getTok().getLoc(); + + // We are at the end of the statement, and this is a default argument, so + // use a default value. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + switch(getLexer().getKind()) { + case AsmToken::Identifier: { + StringRef Tok = Parser.getTok().getString(); + if (Tok == Name) { + Bit = 1; + Parser.Lex(); + } else if (Tok.startswith("no") && Tok.endswith(Name)) { + Bit = 0; + Parser.Lex(); + } else { + return MatchOperand_NoMatch; + } + break; + } + default: + return MatchOperand_NoMatch; + } + } + + Operands.push_back(AMDGPUOperand::CreateImm(Bit, S, ImmTy)); + return MatchOperand_Success; +} + +static bool operandsHasOptionalOp(const OperandVector &Operands, + const OptionalOperand &OOp) { + for (unsigned i = 0; i < Operands.size(); i++) { + const AMDGPUOperand &ParsedOp = ((const AMDGPUOperand &)*Operands[i]); + if ((ParsedOp.isImm() && ParsedOp.getImmTy() == OOp.Type) || + (ParsedOp.isToken() && ParsedOp.getToken() == OOp.Name)) + return true; + + } + return false; +} + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseOptionalOps(const ArrayRef<OptionalOperand> &OptionalOps, + OperandVector &Operands) { + SMLoc S = Parser.getTok().getLoc(); + for (const OptionalOperand &Op : OptionalOps) { + if (operandsHasOptionalOp(Operands, Op)) + continue; + AMDGPUAsmParser::OperandMatchResultTy Res; + int64_t Value; + if (Op.IsBit) { + Res = parseNamedBit(Op.Name, Operands, Op.Type); + if (Res == MatchOperand_NoMatch) + continue; + return Res; + } + + Res = parseIntWithPrefix(Op.Name, Value, Op.Default); + + if (Res == MatchOperand_NoMatch) + continue; + + if (Res != MatchOperand_Success) + return Res; + + if (Op.ConvertResult && !Op.ConvertResult(Value)) { + return MatchOperand_ParseFail; + } + + Operands.push_back(AMDGPUOperand::CreateImm(Value, S, Op.Type)); + return MatchOperand_Success; + } + return MatchOperand_NoMatch; +} + +//===----------------------------------------------------------------------===// +// ds +//===----------------------------------------------------------------------===// + +static const OptionalOperand DSOptionalOps [] = { + {"offset", AMDGPUOperand::ImmTyOffset, false, 0, nullptr}, + {"gds", AMDGPUOperand::ImmTyGDS, true, 0, nullptr} +}; + +static const OptionalOperand DSOptionalOpsOff01 [] = { + {"offset0", AMDGPUOperand::ImmTyDSOffset0, false, 0, nullptr}, + {"offset1", AMDGPUOperand::ImmTyDSOffset1, false, 0, nullptr}, + {"gds", AMDGPUOperand::ImmTyGDS, true, 0, nullptr} +}; + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseDSOptionalOps(OperandVector &Operands) { + return parseOptionalOps(DSOptionalOps, Operands); +} +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseDSOff01OptionalOps(OperandVector &Operands) { + return parseOptionalOps(DSOptionalOpsOff01, Operands); +} + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseDSOffsetOptional(OperandVector &Operands) { + SMLoc S = Parser.getTok().getLoc(); + AMDGPUAsmParser::OperandMatchResultTy Res = + parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset); + if (Res == MatchOperand_NoMatch) { + Operands.push_back(AMDGPUOperand::CreateImm(0, S, + AMDGPUOperand::ImmTyOffset)); + Res = MatchOperand_Success; + } + return Res; +} + +bool AMDGPUOperand::isDSOffset() const { + return isImm() && isUInt<16>(getImm()); +} + +bool AMDGPUOperand::isDSOffset01() const { + return isImm() && isUInt<8>(getImm()); +} + +void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, + const OperandVector &Operands) { + + std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx; + + for (unsigned i = 1, e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + + // Add the register arguments + if (Op.isReg()) { + Op.addRegOperands(Inst, 1); + continue; + } + + // Handle optional arguments + OptionalIdx[Op.getImmTy()] = i; + } + + unsigned Offset0Idx = OptionalIdx[AMDGPUOperand::ImmTyDSOffset0]; + unsigned Offset1Idx = OptionalIdx[AMDGPUOperand::ImmTyDSOffset1]; + unsigned GDSIdx = OptionalIdx[AMDGPUOperand::ImmTyGDS]; + + ((AMDGPUOperand &)*Operands[Offset0Idx]).addImmOperands(Inst, 1); // offset0 + ((AMDGPUOperand &)*Operands[Offset1Idx]).addImmOperands(Inst, 1); // offset1 + ((AMDGPUOperand &)*Operands[GDSIdx]).addImmOperands(Inst, 1); // gds + Inst.addOperand(MCOperand::CreateReg(AMDGPU::M0)); // m0 } +void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) { + + std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx; + bool GDSOnly = false; + + for (unsigned i = 1, e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + + // Add the register arguments + if (Op.isReg()) { + Op.addRegOperands(Inst, 1); + continue; + } + + if (Op.isToken() && Op.getToken() == "gds") { + GDSOnly = true; + continue; + } + + // Handle optional arguments + OptionalIdx[Op.getImmTy()] = i; + } + + unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset]; + ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1); // offset + + if (!GDSOnly) { + unsigned GDSIdx = OptionalIdx[AMDGPUOperand::ImmTyGDS]; + ((AMDGPUOperand &)*Operands[GDSIdx]).addImmOperands(Inst, 1); // gds + } + Inst.addOperand(MCOperand::CreateReg(AMDGPU::M0)); // m0 +} + + //===----------------------------------------------------------------------===// // s_waitcnt //===----------------------------------------------------------------------===// @@ -283,6 +999,7 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { // expcnt [6:4] // lgkmcnt [10:8] int64_t CntVal = 0x77f; + SMLoc S = Parser.getTok().getLoc(); switch(getLexer().getKind()) { default: return MatchOperand_ParseFail; @@ -299,7 +1016,7 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { } while(getLexer().isNot(AsmToken::EndOfStatement)); break; } - Operands.push_back(AMDGPUOperand::CreateImm(CntVal)); + Operands.push_back(AMDGPUOperand::CreateImm(CntVal, S)); return MatchOperand_Success; } @@ -307,6 +1024,245 @@ bool AMDGPUOperand::isSWaitCnt() const { return isImm(); } +//===----------------------------------------------------------------------===// +// sopp branch targets +//===----------------------------------------------------------------------===// + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { + SMLoc S = Parser.getTok().getLoc(); + + switch (getLexer().getKind()) { + default: return MatchOperand_ParseFail; + case AsmToken::Integer: { + int64_t Imm; + if (getParser().parseAbsoluteExpression(Imm)) + return MatchOperand_ParseFail; + Operands.push_back(AMDGPUOperand::CreateImm(Imm, S)); + return MatchOperand_Success; + } + + case AsmToken::Identifier: + Operands.push_back(AMDGPUOperand::CreateExpr( + MCSymbolRefExpr::Create(getContext().GetOrCreateSymbol( + Parser.getTok().getString()), getContext()), S)); + Parser.Lex(); + return MatchOperand_Success; + } +} + +//===----------------------------------------------------------------------===// +// mubuf +//===----------------------------------------------------------------------===// + +static const OptionalOperand MubufOptionalOps [] = { + {"offset", AMDGPUOperand::ImmTyOffset, false, 0, nullptr}, + {"glc", AMDGPUOperand::ImmTyGLC, true, 0, nullptr}, + {"slc", AMDGPUOperand::ImmTySLC, true, 0, nullptr}, + {"tfe", AMDGPUOperand::ImmTyTFE, true, 0, nullptr} +}; + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseMubufOptionalOps(OperandVector &Operands) { + return parseOptionalOps(MubufOptionalOps, Operands); +} + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseOffset(OperandVector &Operands) { + return parseIntWithPrefix("offset", Operands); +} + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseGLC(OperandVector &Operands) { + return parseNamedBit("glc", Operands); +} + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseSLC(OperandVector &Operands) { + return parseNamedBit("slc", Operands); +} + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseTFE(OperandVector &Operands) { + return parseNamedBit("tfe", Operands); +} + +bool AMDGPUOperand::isMubufOffset() const { + return isImm() && isUInt<12>(getImm()); +} + +void AMDGPUAsmParser::cvtMubuf(MCInst &Inst, + const OperandVector &Operands) { + std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx; + + for (unsigned i = 1, e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + + // Add the register arguments + if (Op.isReg()) { + Op.addRegOperands(Inst, 1); + continue; + } + + // Handle the case where soffset is an immediate + if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { + Op.addImmOperands(Inst, 1); + continue; + } + + // Handle tokens like 'offen' which are sometimes hard-coded into the + // asm string. There are no MCInst operands for these. + if (Op.isToken()) { + continue; + } + assert(Op.isImm()); + + // Handle optional arguments + OptionalIdx[Op.getImmTy()] = i; + } + + assert(OptionalIdx.size() == 4); + + unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset]; + unsigned GLCIdx = OptionalIdx[AMDGPUOperand::ImmTyGLC]; + unsigned SLCIdx = OptionalIdx[AMDGPUOperand::ImmTySLC]; + unsigned TFEIdx = OptionalIdx[AMDGPUOperand::ImmTyTFE]; + + ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1); + ((AMDGPUOperand &)*Operands[GLCIdx]).addImmOperands(Inst, 1); + ((AMDGPUOperand &)*Operands[SLCIdx]).addImmOperands(Inst, 1); + ((AMDGPUOperand &)*Operands[TFEIdx]).addImmOperands(Inst, 1); +} + +//===----------------------------------------------------------------------===// +// mimg +//===----------------------------------------------------------------------===// + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseDMask(OperandVector &Operands) { + return parseIntWithPrefix("dmask", Operands); +} + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseUNorm(OperandVector &Operands) { + return parseNamedBit("unorm", Operands); +} + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseR128(OperandVector &Operands) { + return parseNamedBit("r128", Operands); +} + +//===----------------------------------------------------------------------===// +// vop3 +//===----------------------------------------------------------------------===// + +static bool ConvertOmodMul(int64_t &Mul) { + if (Mul != 1 && Mul != 2 && Mul != 4) + return false; + + Mul >>= 1; + return true; +} + +static bool ConvertOmodDiv(int64_t &Div) { + if (Div == 1) { + Div = 0; + return true; + } + + if (Div == 2) { + Div = 3; + return true; + } + + return false; +} + +static const OptionalOperand VOP3OptionalOps [] = { + {"clamp", AMDGPUOperand::ImmTyClamp, true, 0, nullptr}, + {"mul", AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodMul}, + {"div", AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodDiv}, +}; + +static bool isVOP3(OperandVector &Operands) { + if (operandsHaveModifiers(Operands)) + return true; + + AMDGPUOperand &DstOp = ((AMDGPUOperand&)*Operands[1]); + + if (DstOp.isReg() && DstOp.isRegClass(AMDGPU::SGPR_64RegClassID)) + return true; + + if (Operands.size() >= 5) + return true; + + if (Operands.size() > 3) { + AMDGPUOperand &Src1Op = ((AMDGPUOperand&)*Operands[3]); + if (Src1Op.getReg() && (Src1Op.isRegClass(AMDGPU::SReg_32RegClassID) || + Src1Op.isRegClass(AMDGPU::SReg_64RegClassID))) + return true; + } + return false; +} + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseVOP3OptionalOps(OperandVector &Operands) { + + // The value returned by this function may change after parsing + // an operand so store the original value here. + bool HasModifiers = operandsHaveModifiers(Operands); + + bool IsVOP3 = isVOP3(Operands); + if (HasModifiers || IsVOP3 || + getLexer().isNot(AsmToken::EndOfStatement) || + getForcedEncodingSize() == 64) { + + AMDGPUAsmParser::OperandMatchResultTy Res = + parseOptionalOps(VOP3OptionalOps, Operands); + + if (!HasModifiers && Res == MatchOperand_Success) { + // We have added a modifier operation, so we need to make sure all + // previous register operands have modifiers + for (unsigned i = 2, e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]); + if (Op.isReg()) + Op.setModifiers(0); + } + } + return Res; + } + return MatchOperand_NoMatch; +} + +void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { + ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); + unsigned i = 2; + + std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx; + + if (operandsHaveModifiers(Operands)) { + for (unsigned e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + + if (Op.isRegWithInputMods()) { + ((AMDGPUOperand &)*Operands[i]).addRegWithInputModsOperands(Inst, 2); + continue; + } + OptionalIdx[Op.getImmTy()] = i; + } + + unsigned ClampIdx = OptionalIdx[AMDGPUOperand::ImmTyClamp]; + unsigned OModIdx = OptionalIdx[AMDGPUOperand::ImmTyOMod]; + + ((AMDGPUOperand &)*Operands[ClampIdx]).addImmOperands(Inst, 1); + ((AMDGPUOperand &)*Operands[OModIdx]).addImmOperands(Inst, 1); + } else { + for (unsigned e = Operands.size(); i != e; ++i) + ((AMDGPUOperand &)*Operands[i]).addRegOrImmOperands(Inst, 1); + } +} + /// Force static initialization. extern "C" void LLVMInitializeR600AsmParser() { RegisterMCAsmParser<AMDGPUAsmParser> A(TheAMDGPUTarget); diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp index d62fd3f..279c3eb 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp @@ -20,7 +20,7 @@ using namespace llvm; void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, - StringRef Annot) { + StringRef Annot, const MCSubtargetInfo &STI) { OS.flush(); printInstruction(MI, OS); @@ -89,14 +89,18 @@ void AMDGPUInstPrinter::printDSOffset(const MCInst *MI, unsigned OpNo, void AMDGPUInstPrinter::printDSOffset0(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - O << " offset0:"; - printU8ImmDecOperand(MI, OpNo, O); + if (MI->getOperand(OpNo).getImm()) { + O << " offset0:"; + printU8ImmDecOperand(MI, OpNo, O); + } } void AMDGPUInstPrinter::printDSOffset1(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - O << " offset1:"; - printU8ImmDecOperand(MI, OpNo, O); + if (MI->getOperand(OpNo).getImm()) { + O << " offset1:"; + printU8ImmDecOperand(MI, OpNo, O); + } } void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo, @@ -123,7 +127,8 @@ void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo, O << " tfe"; } -void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) { +void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O, + const MCRegisterInfo &MRI) { switch (reg) { case AMDGPU::VCC: O << "vcc"; @@ -293,7 +298,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, break; default: - printRegOperand(Op.getReg(), O); + printRegOperand(Op.getReg(), O, MRI); break; } } else if (Op.isImm()) { diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h index 5289718..14fb511 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h @@ -29,7 +29,10 @@ public: void printInstruction(const MCInst *MI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; + static void printRegOperand(unsigned RegNo, raw_ostream &O, + const MCRegisterInfo &MRI); private: void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp index d0c634f..f33e692 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -24,7 +24,7 @@ namespace { class AMDGPUMCObjectWriter : public MCObjectWriter { public: - AMDGPUMCObjectWriter(raw_ostream &OS) : MCObjectWriter(OS, true) { } + AMDGPUMCObjectWriter(raw_pwrite_stream &OS) : MCObjectWriter(OS, true) {} void ExecutePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) override { //XXX: Implement if necessary. @@ -131,7 +131,7 @@ class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend { public: ELFAMDGPUAsmBackend(const Target &T) : AMDGPUAsmBackend(T) { } - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createAMDGPUELFObjectWriter(OS); } }; diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp index 5fb94d5..59f45ff 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -33,7 +33,7 @@ protected: AMDGPUELFObjectWriter::AMDGPUELFObjectWriter() : MCELFObjectTargetWriter(false, 0, 0, false) { } -MCObjectWriter *llvm::createAMDGPUELFObjectWriter(raw_ostream &OS) { +MCObjectWriter *llvm::createAMDGPUELFObjectWriter(raw_pwrite_stream &OS) { MCELFObjectTargetWriter *MOTW = new AMDGPUELFObjectWriter(); return createELFObjectWriter(MOTW, OS, true); } diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp index fb2deef..7b280a4 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp @@ -64,12 +64,11 @@ static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM, return X; } -static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T, +static MCInstPrinter *createAMDGPUMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { + const MCRegisterInfo &MRI) { return new AMDGPUInstPrinter(MAI, MII, MRI); } diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h index 23f0196..9a7548e 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h @@ -28,6 +28,7 @@ class MCObjectWriter; class MCRegisterInfo; class MCSubtargetInfo; class Target; +class raw_pwrite_stream; class raw_ostream; extern Target TheAMDGPUTarget; @@ -44,7 +45,7 @@ MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII, MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); -MCObjectWriter *createAMDGPUELFObjectWriter(raw_ostream &OS); +MCObjectWriter *createAMDGPUELFObjectWriter(raw_pwrite_stream &OS); } // End llvm namespace #define GET_REGINFO_ENUM diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp index 760aa37..24f2b6d 100644 --- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp @@ -49,7 +49,7 @@ public: MCContext &ctx) : MCII(mcii), MRI(mri), Ctx(ctx) { } - ~SIMCCodeEmitter() { } + ~SIMCCodeEmitter() override {} /// \brief Encode the instruction and write it to the OS. void EncodeInstruction(const MCInst &MI, raw_ostream &OS, diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index a34e2dc..b6b7067 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -1811,7 +1811,7 @@ SDValue Swz[4], SelectionDAG &DAG) const { BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap); for (unsigned i = 0; i < 4; i++) { - unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue(); + unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue(); if (SwizzleRemap.find(Idx) != SwizzleRemap.end()) Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32); } @@ -1819,7 +1819,7 @@ SDValue Swz[4], SelectionDAG &DAG) const { SwizzleRemap.clear(); BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap); for (unsigned i = 0; i < 4; i++) { - unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue(); + unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue(); if (SwizzleRemap.find(Idx) != SwizzleRemap.end()) Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32); } diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 291fb04..7126c82 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -683,6 +683,11 @@ def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>; // TODO: Do these actually match the regular fmin/fmax behavior? def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax_legacy>; def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin_legacy>; +// According to https://msdn.microsoft.com/en-us/library/windows/desktop/cc308050%28v=vs.85%29.aspx +// DX10 min/max returns the other operand if one is NaN, +// this matches http://llvm.org/docs/LangRef.html#llvm-minnum-intrinsic +def MAX_DX10 : R600_2OP_Helper <0x5, "MAX_DX10", fmaxnum>; +def MIN_DX10 : R600_2OP_Helper <0x6, "MIN_DX10", fminnum>; // For the SET* instructions there is a naming conflict in TargetSelectionDAG.td, // so some of the instruction names don't match the asm string. diff --git a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp index 419ec8b..2fc7b02 100644 --- a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp +++ b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp @@ -162,7 +162,7 @@ class R600TextureIntrinsicsReplacer : Value *SamplerId = I.getArgOperand(2); unsigned TextureType = - dyn_cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); + cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); unsigned SrcSelect[4] = { 0, 1, 2, 3 }; unsigned CT[4] = {1, 1, 1, 1}; @@ -186,7 +186,7 @@ class R600TextureIntrinsicsReplacer : Value *SamplerId = I.getArgOperand(5); unsigned TextureType = - dyn_cast<ConstantInt>(I.getArgOperand(6))->getZExtValue(); + cast<ConstantInt>(I.getArgOperand(6))->getZExtValue(); unsigned SrcSelect[4] = { 0, 1, 2, 3 }; unsigned CT[4] = {1, 1, 1, 1}; diff --git a/lib/Target/R600/SIAnnotateControlFlow.cpp b/lib/Target/R600/SIAnnotateControlFlow.cpp index 79f6532..d39ab3f 100644 --- a/lib/Target/R600/SIAnnotateControlFlow.cpp +++ b/lib/Target/R600/SIAnnotateControlFlow.cpp @@ -83,7 +83,7 @@ class SIAnnotateControlFlow : public FunctionPass { void insertElse(BranchInst *Term); - Value *handleLoopCondition(Value *Cond, PHINode *Broken); + Value *handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L); void handleLoop(BranchInst *Term); @@ -207,7 +207,8 @@ void SIAnnotateControlFlow::insertElse(BranchInst *Term) { } /// \brief Recursively handle the condition leading to a loop -Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken) { +Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken, + llvm::Loop *L) { if (PHINode *Phi = dyn_cast<PHINode>(Cond)) { BasicBlock *Parent = Phi->getParent(); PHINode *NewPhi = PHINode::Create(Int64, 0, "", &Parent->front()); @@ -223,7 +224,7 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken) } Phi->setIncomingValue(i, BoolFalse); - Value *PhiArg = handleLoopCondition(Incoming, Broken); + Value *PhiArg = handleLoopCondition(Incoming, Broken, L); NewPhi->addIncoming(PhiArg, From); } @@ -253,7 +254,12 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken) } else if (Instruction *Inst = dyn_cast<Instruction>(Cond)) { BasicBlock *Parent = Inst->getParent(); - TerminatorInst *Insert = Parent->getTerminator(); + Instruction *Insert; + if (L->contains(Inst)) { + Insert = Parent->getTerminator(); + } else { + Insert = L->getHeader()->getFirstNonPHIOrDbgOrLifetime(); + } Value *Args[] = { Cond, Broken }; return CallInst::Create(IfBreak, Args, "", Insert); @@ -265,14 +271,15 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken) /// \brief Handle a back edge (loop) void SIAnnotateControlFlow::handleLoop(BranchInst *Term) { + BasicBlock *BB = Term->getParent(); + llvm::Loop *L = LI->getLoopFor(BB); BasicBlock *Target = Term->getSuccessor(1); PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front()); Value *Cond = Term->getCondition(); Term->setCondition(BoolTrue); - Value *Arg = handleLoopCondition(Cond, Broken); + Value *Arg = handleLoopCondition(Cond, Broken, L); - BasicBlock *BB = Term->getParent(); for (pred_iterator PI = pred_begin(Target), PE = pred_end(Target); PI != PE; ++PI) { diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index bd0c3c2..43507d8 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -76,8 +76,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, setOperationAction(ISD::FSIN, MVT::f32, Custom); setOperationAction(ISD::FCOS, MVT::f32, Custom); - setOperationAction(ISD::FMINNUM, MVT::f32, Legal); - setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); setOperationAction(ISD::FMINNUM, MVT::f64, Legal); setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); @@ -2089,3 +2087,38 @@ SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG, return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(DAG.getEntryNode()), cast<RegisterSDNode>(VReg)->getReg(), VT); } + +//===----------------------------------------------------------------------===// +// SI Inline Assembly Support +//===----------------------------------------------------------------------===// + +std::pair<unsigned, const TargetRegisterClass *> +SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + const std::string &Constraint, + MVT VT) const { + if (Constraint == "r") { + switch(VT.SimpleTy) { + default: llvm_unreachable("Unhandled type for 'r' inline asm constraint"); + case MVT::i64: + return std::make_pair(0U, &AMDGPU::SGPR_64RegClass); + case MVT::i32: + return std::make_pair(0U, &AMDGPU::SGPR_32RegClass); + } + } + + if (Constraint.size() > 1) { + const TargetRegisterClass *RC = nullptr; + if (Constraint[1] == 'v') { + RC = &AMDGPU::VGPR_32RegClass; + } else if (Constraint[1] == 's') { + RC = &AMDGPU::SGPR_32RegClass; + } + + if (RC) { + unsigned Idx = std::atoi(Constraint.substr(2).c_str()); + if (Idx < RC->getNumRegs()) + return std::make_pair(RC->getRegister(Idx), RC); + } + } + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); +} diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index 92f5847..a6bc7c6 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -113,6 +113,10 @@ public: MachineSDNode *buildScratchRSRC(SelectionDAG &DAG, SDLoc DL, SDValue Ptr) const; + + std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint( + const TargetRegisterInfo *TRI, + const std::string &Constraint, MVT VT) const override; }; } // End namespace llvm diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index 4167590..bc693c3 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -130,6 +130,11 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> : let AddedComplexity = -1000; let VOP3 = 1; + let VALU = 1; + + let AsmMatchConverter = "cvtVOP3"; + let isCodeGenOnly = 0; + int Size = 8; } @@ -181,6 +186,19 @@ class SOPKe <bits<5> op> : Enc32 { let Inst{31-28} = 0xb; //encoding } +class SOPK64e <bits<5> op> : Enc64 { + bits <7> sdst = 0; + bits <16> simm16; + bits <32> imm; + + let Inst{15-0} = simm16; + let Inst{22-16} = sdst; + let Inst{27-23} = op; + let Inst{31-28} = 0xb; + + let Inst{63-32} = imm; +} + class SOPPe <bits<7> op> : Enc32 { bits <16> simm16; @@ -208,6 +226,7 @@ class SOP1 <dag outs, dag ins, string asm, list<dag> pattern> : let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; + let isCodeGenOnly = 0; let SALU = 1; let SOP1 = 1; } @@ -218,6 +237,7 @@ class SOP2 <dag outs, dag ins, string asm, list<dag> pattern> : let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; + let isCodeGenOnly = 0; let SALU = 1; let SOP2 = 1; @@ -233,6 +253,7 @@ class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : let hasSideEffects = 0; let SALU = 1; let SOPC = 1; + let isCodeGenOnly = 0; let UseNamedOperandTable = 1; } @@ -550,10 +571,14 @@ let Uses = [EXEC] in { class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> : VOP1Common <outs, ins, asm, pattern>, - VOP1e<op>; + VOP1e<op> { + let isCodeGenOnly = 0; +} class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> : - VOP2Common <outs, ins, asm, pattern>, VOP2e<op>; + VOP2Common <outs, ins, asm, pattern>, VOP2e<op> { + let isCodeGenOnly = 0; +} class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> : VOPCCommon <ins, asm, pattern>, VOPCe <op>; @@ -586,6 +611,7 @@ class DS <dag outs, dag ins, string asm, list<dag> pattern> : let mayStore = 1; let hasSideEffects = 0; + let AsmMatchConverter = "cvtDS"; let SchedRW = [WriteLDS]; } @@ -598,6 +624,7 @@ class MUBUF <dag outs, dag ins, string asm, list<dag> pattern> : let hasSideEffects = 0; let UseNamedOperandTable = 1; + let AsmMatchConverter = "cvtMubuf"; let SchedRW = [WriteVMEM]; } diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index d603ecb..076a0ce 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -6,6 +6,15 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +def isSICI : Predicate< + "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS" +>, AssemblerPredicate<"FeatureGCN1Encoding">; +def isCI : Predicate<"Subtarget->getGeneration() " + ">= AMDGPUSubtarget::SEA_ISLANDS">; +def isVI : Predicate < + "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">, + AssemblerPredicate<"FeatureGCN3Encoding">; class vop { field bits<9> SI3; @@ -233,14 +242,88 @@ def FRAMEri32 : Operand<iPTR> { let MIOperandInfo = (ops i32:$ptr, i32imm:$index); } +def SoppBrTarget : AsmOperandClass { + let Name = "SoppBrTarget"; + let ParserMethod = "parseSOppBrTarget"; +} + def sopp_brtarget : Operand<OtherVT> { let EncoderMethod = "getSOPPBrEncoding"; let OperandType = "OPERAND_PCREL"; + let ParserMatchClass = SoppBrTarget; } include "SIInstrFormats.td" include "VIInstrFormats.td" +def MubufOffsetMatchClass : AsmOperandClass { + let Name = "MubufOffset"; + let ParserMethod = "parseMubufOptionalOps"; + let RenderMethod = "addImmOperands"; +} + +class DSOffsetBaseMatchClass <string parser> : AsmOperandClass { + let Name = "DSOffset"#parser; + let ParserMethod = parser; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isDSOffset"; +} + +def DSOffsetMatchClass : DSOffsetBaseMatchClass <"parseDSOptionalOps">; +def DSOffsetGDSMatchClass : DSOffsetBaseMatchClass <"parseDSOffsetOptional">; + +def DSOffset01MatchClass : AsmOperandClass { + let Name = "DSOffset1"; + let ParserMethod = "parseDSOff01OptionalOps"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isDSOffset01"; +} + +class GDSBaseMatchClass <string parser> : AsmOperandClass { + let Name = "GDS"#parser; + let PredicateMethod = "isImm"; + let ParserMethod = parser; + let RenderMethod = "addImmOperands"; +} + +def GDSMatchClass : GDSBaseMatchClass <"parseDSOptionalOps">; +def GDS01MatchClass : GDSBaseMatchClass <"parseDSOff01OptionalOps">; + +def GLCMatchClass : AsmOperandClass { + let Name = "GLC"; + let PredicateMethod = "isImm"; + let ParserMethod = "parseMubufOptionalOps"; + let RenderMethod = "addImmOperands"; +} + +def SLCMatchClass : AsmOperandClass { + let Name = "SLC"; + let PredicateMethod = "isImm"; + let ParserMethod = "parseMubufOptionalOps"; + let RenderMethod = "addImmOperands"; +} + +def TFEMatchClass : AsmOperandClass { + let Name = "TFE"; + let PredicateMethod = "isImm"; + let ParserMethod = "parseMubufOptionalOps"; + let RenderMethod = "addImmOperands"; +} + +def OModMatchClass : AsmOperandClass { + let Name = "OMod"; + let PredicateMethod = "isImm"; + let ParserMethod = "parseVOP3OptionalOps"; + let RenderMethod = "addImmOperands"; +} + +def ClampMatchClass : AsmOperandClass { + let Name = "Clamp"; + let PredicateMethod = "isImm"; + let ParserMethod = "parseVOP3OptionalOps"; + let RenderMethod = "addImmOperands"; +} + let OperandType = "OPERAND_IMMEDIATE" in { def offen : Operand<i1> { @@ -254,35 +337,52 @@ def addr64 : Operand<i1> { } def mbuf_offset : Operand<i16> { let PrintMethod = "printMBUFOffset"; + let ParserMatchClass = MubufOffsetMatchClass; } -def ds_offset : Operand<i16> { +class ds_offset_base <AsmOperandClass mc> : Operand<i16> { let PrintMethod = "printDSOffset"; + let ParserMatchClass = mc; } +def ds_offset : ds_offset_base <DSOffsetMatchClass>; +def ds_offset_gds : ds_offset_base <DSOffsetGDSMatchClass>; + def ds_offset0 : Operand<i8> { let PrintMethod = "printDSOffset0"; + let ParserMatchClass = DSOffset01MatchClass; } def ds_offset1 : Operand<i8> { let PrintMethod = "printDSOffset1"; + let ParserMatchClass = DSOffset01MatchClass; } -def gds : Operand <i1> { +class gds_base <AsmOperandClass mc> : Operand <i1> { let PrintMethod = "printGDS"; + let ParserMatchClass = mc; } +def gds : gds_base <GDSMatchClass>; + +def gds01 : gds_base <GDS01MatchClass>; + def glc : Operand <i1> { let PrintMethod = "printGLC"; + let ParserMatchClass = GLCMatchClass; } def slc : Operand <i1> { let PrintMethod = "printSLC"; + let ParserMatchClass = SLCMatchClass; } def tfe : Operand <i1> { let PrintMethod = "printTFE"; + let ParserMatchClass = TFEMatchClass; } def omod : Operand <i32> { let PrintMethod = "printOModSI"; + let ParserMatchClass = OModMatchClass; } def ClampMod : Operand <i1> { let PrintMethod = "printClampSI"; + let ParserMatchClass = ClampMatchClass; } } // End OperandType = "OPERAND_IMMEDIATE" @@ -392,12 +492,18 @@ class SOP1_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : class SOP1_Real_si <sop1 op, string opName, dag outs, dag ins, string asm> : SOP1 <outs, ins, asm, []>, SOP1e <op.SI>, - SIMCInstr<opName, SISubtarget.SI>; + SIMCInstr<opName, SISubtarget.SI> { + let isCodeGenOnly = 0; + let AssemblerPredicates = [isSICI]; +} class SOP1_Real_vi <sop1 op, string opName, dag outs, dag ins, string asm> : SOP1 <outs, ins, asm, []>, SOP1e <op.VI>, - SIMCInstr<opName, SISubtarget.VI>; + SIMCInstr<opName, SISubtarget.VI> { + let isCodeGenOnly = 0; + let AssemblerPredicates = [isVI]; +} multiclass SOP1_m <sop1 op, string opName, dag outs, dag ins, string asm, list<dag> pattern> { @@ -473,12 +579,16 @@ class SOP2_Pseudo<string opName, dag outs, dag ins, list<dag> pattern> : class SOP2_Real_si<sop2 op, string opName, dag outs, dag ins, string asm> : SOP2<outs, ins, asm, []>, SOP2e<op.SI>, - SIMCInstr<opName, SISubtarget.SI>; + SIMCInstr<opName, SISubtarget.SI> { + let AssemblerPredicates = [isSICI]; +} class SOP2_Real_vi<sop2 op, string opName, dag outs, dag ins, string asm> : SOP2<outs, ins, asm, []>, SOP2e<op.VI>, - SIMCInstr<opName, SISubtarget.VI>; + SIMCInstr<opName, SISubtarget.VI> { + let AssemblerPredicates = [isVI]; +} multiclass SOP2_SELECT_32 <sop2 op, string opName, list<dag> pattern> { def "" : SOP2_Pseudo <opName, (outs SReg_32:$dst), @@ -540,12 +650,28 @@ class SOPK_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : class SOPK_Real_si <sopk op, string opName, dag outs, dag ins, string asm> : SOPK <outs, ins, asm, []>, SOPKe <op.SI>, - SIMCInstr<opName, SISubtarget.SI>; + SIMCInstr<opName, SISubtarget.SI> { + let AssemblerPredicates = [isSICI]; + let isCodeGenOnly = 0; +} class SOPK_Real_vi <sopk op, string opName, dag outs, dag ins, string asm> : SOPK <outs, ins, asm, []>, SOPKe <op.VI>, - SIMCInstr<opName, SISubtarget.VI>; + SIMCInstr<opName, SISubtarget.VI> { + let AssemblerPredicates = [isVI]; + let isCodeGenOnly = 0; +} + +multiclass SOPK_m <sopk op, string opName, dag outs, dag ins, string opAsm, + string asm = opName#opAsm> { + def "" : SOPK_Pseudo <opName, outs, ins, []>; + + def _si : SOPK_Real_si <op, opName, outs, ins, asm>; + + def _vi : SOPK_Real_vi <op, opName, outs, ins, asm>; + +} multiclass SOPK_32 <sopk op, string opName, list<dag> pattern> { def "" : SOPK_Pseudo <opName, (outs SReg_32:$dst), (ins u16imm:$src0), @@ -562,13 +688,39 @@ multiclass SOPK_SCC <sopk op, string opName, list<dag> pattern> { def "" : SOPK_Pseudo <opName, (outs SCCReg:$dst), (ins SReg_32:$src0, u16imm:$src1), pattern>; - def _si : SOPK_Real_si <op, opName, (outs SCCReg:$dst), - (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0">; + let DisableEncoding = "$dst" in { + def _si : SOPK_Real_si <op, opName, (outs SCCReg:$dst), + (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">; - def _vi : SOPK_Real_vi <op, opName, (outs SCCReg:$dst), - (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0">; + def _vi : SOPK_Real_vi <op, opName, (outs SCCReg:$dst), + (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">; + } } +multiclass SOPK_32TIE <sopk op, string opName, list<dag> pattern> : SOPK_m < + op, opName, (outs SReg_32:$sdst), (ins SReg_32:$src0, u16imm:$simm16), + " $sdst, $simm16" +>; + +multiclass SOPK_IMM32 <sopk op, string opName, dag outs, dag ins, + string argAsm, string asm = opName#argAsm> { + + def "" : SOPK_Pseudo <opName, outs, ins, []>; + + def _si : SOPK <outs, ins, asm, []>, + SOPK64e <op.SI>, + SIMCInstr<opName, SISubtarget.SI> { + let AssemblerPredicates = [isSICI]; + let isCodeGenOnly = 0; + } + + def _vi : SOPK <outs, ins, asm, []>, + SOPK64e <op.VI>, + SIMCInstr<opName, SISubtarget.VI> { + let AssemblerPredicates = [isVI]; + let isCodeGenOnly = 0; + } +} //===----------------------------------------------------------------------===// // SMRD classes //===----------------------------------------------------------------------===// @@ -584,13 +736,17 @@ class SMRD_Real_si <bits<5> op, string opName, bit imm, dag outs, dag ins, string asm> : SMRD <outs, ins, asm, []>, SMRDe <op, imm>, - SIMCInstr<opName, SISubtarget.SI>; + SIMCInstr<opName, SISubtarget.SI> { + let AssemblerPredicates = [isSICI]; +} class SMRD_Real_vi <bits<8> op, string opName, bit imm, dag outs, dag ins, string asm> : SMRD <outs, ins, asm, []>, SMEMe_vi <op, imm>, - SIMCInstr<opName, SISubtarget.VI>; + SIMCInstr<opName, SISubtarget.VI> { + let AssemblerPredicates = [isVI]; +} multiclass SMRD_m <bits<5> op, string opName, bit imm, dag outs, dag ins, string asm, list<dag> pattern> { @@ -629,8 +785,14 @@ multiclass SMRD_Helper <bits<5> op, string opName, RegisterClass baseClass, def InputMods : OperandWithDefaultOps <i32, (ops (i32 0))> { let PrintMethod = "printOperandAndMods"; } + +def InputModsMatchClass : AsmOperandClass { + let Name = "RegWithInputMods"; +} + def InputModsNoDefault : Operand <i32> { let PrintMethod = "printOperandAndMods"; + let ParserMatchClass = InputModsMatchClass; } class getNumSrcArgs<ValueType Src1, ValueType Src2> { @@ -838,7 +1000,8 @@ class AtomicNoRet <string noRetOp, bit isRet> { class VOP1_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> : VOP1Common <outs, ins, "", pattern>, VOP <opName>, - SIMCInstr <opName#"_e32", SISubtarget.NONE> { + SIMCInstr <opName#"_e32", SISubtarget.NONE>, + MnemonicAlias<opName#"_e32", opName> { let isPseudo = 1; let isCodeGenOnly = 1; @@ -873,18 +1036,23 @@ multiclass VOP1SI_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern, class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> : VOP2Common <outs, ins, "", pattern>, VOP <opName>, - SIMCInstr<opName#"_e32", SISubtarget.NONE> { + SIMCInstr<opName#"_e32", SISubtarget.NONE>, + MnemonicAlias<opName#"_e32", opName> { let isPseudo = 1; let isCodeGenOnly = 1; } class VOP2_Real_si <string opName, vop2 op, dag outs, dag ins, string asm> : VOP2 <op.SI, outs, ins, opName#asm, []>, - SIMCInstr <opName#"_e32", SISubtarget.SI>; + SIMCInstr <opName#"_e32", SISubtarget.SI> { + let AssemblerPredicates = [isSICI]; +} class VOP2_Real_vi <string opName, vop2 op, dag outs, dag ins, string asm> : - VOP2 <op.SI, outs, ins, opName#asm, []>, - SIMCInstr <opName#"_e32", SISubtarget.VI>; + VOP2 <op.VI, outs, ins, opName#asm, []>, + SIMCInstr <opName#"_e32", SISubtarget.VI> { + let AssemblerPredicates = [isVI]; +} multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern, string opName, string revOp> { @@ -930,7 +1098,8 @@ class VOP3DisableModFields <bit HasSrc0Mods, class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> : VOP3Common <outs, ins, "", pattern>, VOP <opName>, - SIMCInstr<opName#"_e64", SISubtarget.NONE> { + SIMCInstr<opName#"_e64", SISubtarget.NONE>, + MnemonicAlias<opName#"_e64", opName> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -938,22 +1107,30 @@ class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> : class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> : VOP3Common <outs, ins, asm, []>, VOP3e <op>, - SIMCInstr<opName#"_e64", SISubtarget.SI>; + SIMCInstr<opName#"_e64", SISubtarget.SI> { + let AssemblerPredicates = [isSICI]; +} class VOP3_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> : VOP3Common <outs, ins, asm, []>, VOP3e_vi <op>, - SIMCInstr <opName#"_e64", SISubtarget.VI>; + SIMCInstr <opName#"_e64", SISubtarget.VI> { + let AssemblerPredicates = [isVI]; +} class VOP3b_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> : VOP3Common <outs, ins, asm, []>, VOP3be <op>, - SIMCInstr<opName#"_e64", SISubtarget.SI>; + SIMCInstr<opName#"_e64", SISubtarget.SI> { + let AssemblerPredicates = [isSICI]; +} class VOP3b_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> : VOP3Common <outs, ins, asm, []>, VOP3be_vi <op>, - SIMCInstr <opName#"_e64", SISubtarget.VI>; + SIMCInstr <opName#"_e64", SISubtarget.VI> { + let AssemblerPredicates = [isVI]; +} multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern, string opName, int NumSrcArgs, bit HasMods = 1> { @@ -1095,12 +1272,16 @@ multiclass VOP2SI_3VI_m <vop3 op, string opName, dag outs, dag ins, } def _si : VOP2 <op.SI3{5-0}, outs, ins, asm, []>, - SIMCInstr <opName, SISubtarget.SI>; + SIMCInstr <opName, SISubtarget.SI> { + let AssemblerPredicates = [isSICI]; + } def _vi : VOP3Common <outs, ins, asm, []>, VOP3e_vi <op.VI3>, VOP3DisableFields <1, 0, 0>, - SIMCInstr <opName, SISubtarget.VI>; + SIMCInstr <opName, SISubtarget.VI> { + let AssemblerPredicates = [isVI]; + } } multiclass VOP1_Helper <vop1 op, string opName, dag outs, @@ -1253,7 +1434,8 @@ let isCodeGenOnly = 0 in { class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> : VOPCCommon <ins, "", pattern>, VOP <opName>, - SIMCInstr<opName#"_e32", SISubtarget.NONE> { + SIMCInstr<opName#"_e32", SISubtarget.NONE>, + MnemonicAlias<opName#"_e32", opName> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -1504,7 +1686,9 @@ class DS_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : class DS_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> : DS <outs, ins, asm, []>, DSe <op>, - SIMCInstr <opName, SISubtarget.SI>; + SIMCInstr <opName, SISubtarget.SI> { + let isCodeGenOnly = 0; +} class DS_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> : DS <outs, ins, asm, []>, @@ -1518,6 +1702,7 @@ class DS_Off16_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm bits<16> offset; let offset0 = offset{7-0}; let offset1 = offset{15-8}; + let isCodeGenOnly = 0; } class DS_Off16_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> : @@ -1545,12 +1730,12 @@ multiclass DS_1A_RET <bits<8> op, string opName, RegisterClass rc, multiclass DS_1A_Off8_RET <bits<8> op, string opName, RegisterClass rc, dag outs = (outs rc:$vdst), dag ins = (ins VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1, - gds:$gds, M0Reg:$m0), + gds01:$gds, M0Reg:$m0), string asm = opName#" $vdst, $addr"#"$offset0"#"$offset1$gds"> { def "" : DS_Pseudo <opName, outs, ins, []>; - let data0 = 0, data1 = 0 in { + let data0 = 0, data1 = 0, AsmMatchConverter = "cvtDSOffset01" in { def _si : DS_Real_si <op, opName, outs, ins, asm>; def _vi : DS_Real_vi <op, opName, outs, ins, asm>; } @@ -1574,12 +1759,12 @@ multiclass DS_1A1D_NORET <bits<8> op, string opName, RegisterClass rc, multiclass DS_1A1D_Off8_NORET <bits<8> op, string opName, RegisterClass rc, dag outs = (outs), dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1, - ds_offset0:$offset0, ds_offset1:$offset1, gds:$gds, M0Reg:$m0), + ds_offset0:$offset0, ds_offset1:$offset1, gds01:$gds, M0Reg:$m0), string asm = opName#" $addr, $data0, $data1"#"$offset0"#"$offset1"#"$gds"> { def "" : DS_Pseudo <opName, outs, ins, []>; - let vdst = 0 in { + let vdst = 0, AsmMatchConverter = "cvtDSOffset01" in { def _si : DS_Real_si <op, opName, outs, ins, asm>; def _vi : DS_Real_vi <op, opName, outs, ins, asm>; } @@ -1653,7 +1838,7 @@ multiclass DS_0A_RET <bits<8> op, string opName, multiclass DS_1A_RET_GDS <bits<8> op, string opName, dag outs = (outs VGPR_32:$vdst), - dag ins = (ins VGPR_32:$addr, ds_offset:$offset, M0Reg:$m0), + dag ins = (ins VGPR_32:$addr, ds_offset_gds:$offset, M0Reg:$m0), string asm = opName#" $vdst, $addr"#"$offset gds"> { def "" : DS_Pseudo <opName, outs, ins, []>; @@ -1762,6 +1947,20 @@ class mubuf <bits<7> si, bits<7> vi = si> { field bits<7> VI = vi; } +let isCodeGenOnly = 0 in { + +class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : + MUBUF <outs, ins, asm, pattern>, MUBUFe <op> { + let lds = 0; +} + +} // End let isCodeGenOnly = 0 + +class MUBUF_vi <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : + MUBUF <outs, ins, asm, pattern>, MUBUFe_vi <op> { + let lds = 0; +} + class MUBUFAddr64Table <bit is_addr64, string suffix = ""> { bit IsAddr64 = is_addr64; string OpName = NAME # suffix; @@ -1805,7 +2004,7 @@ multiclass MUBUF_m <mubuf op, string opName, dag outs, dag ins, string asm, def "" : MUBUF_Pseudo <opName, outs, ins, pattern>, MUBUFAddr64Table <0>; - let addr64 = 0 in { + let addr64 = 0, isCodeGenOnly = 0 in { def _si : MUBUF_Real_si <op, opName, outs, ins, asm>; } @@ -1818,7 +2017,7 @@ multiclass MUBUFAddr64_m <mubuf op, string opName, dag outs, def "" : MUBUF_Pseudo <opName, outs, ins, pattern>, MUBUFAddr64Table <1>; - let addr64 = 1 in { + let addr64 = 1, isCodeGenOnly = 0 in { def _si : MUBUF_Real_si <op, opName, outs, ins, asm>; } @@ -1826,11 +2025,6 @@ multiclass MUBUFAddr64_m <mubuf op, string opName, dag outs, // for VI appropriately. } -class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : - MUBUF <outs, ins, asm, pattern>, MUBUFe <op> { - let lds = 0; -} - multiclass MUBUFAtomicOffset_m <mubuf op, string opName, dag outs, dag ins, string asm, list<dag> pattern, bit is_return> { diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 95b2470..91e8c8c 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -27,18 +27,10 @@ def SendMsgImm : Operand<i32> { } def isGCN : Predicate<"Subtarget->getGeneration() " - ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">; + ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">, + AssemblerPredicate<"FeatureGCN">; def isSI : Predicate<"Subtarget->getGeneration() " "== AMDGPUSubtarget::SOUTHERN_ISLANDS">; -def isSICI : Predicate< - "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" - "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS" ->; -def isCI : Predicate<"Subtarget->getGeneration() " - ">= AMDGPUSubtarget::SEA_ISLANDS">; -def isVI : Predicate < - "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS" ->; def HasFlatAddressSpace : Predicate<"Subtarget.hasFlatAddressSpace()">; @@ -242,9 +234,9 @@ defm S_MAX_U32 : SOP2_32 <sop2<0x09>, "s_max_u32", >; } // End Defs = [SCC] -defm S_CSELECT_B32 : SOP2_SELECT_32 <sop2<0x0a>, "s_cselect_b32", []>; let Uses = [SCC] in { + defm S_CSELECT_B32 : SOP2_32 <sop2<0x0a>, "s_cselect_b32", []>; defm S_CSELECT_B64 : SOP2_64 <sop2<0x0b>, "s_cselect_b64", []>; } // End Uses = [SCC] @@ -387,6 +379,7 @@ defm S_CMPK_EQ_I32 : SOPK_SCC <sopk<0x03, 0x02>, "s_cmpk_eq_i32", >; */ +defm S_CMPK_EQ_I32 : SOPK_SCC <sopk<0x03, 0x02>, "s_cmpk_eq_i32", []>; defm S_CMPK_LG_I32 : SOPK_SCC <sopk<0x04, 0x03>, "s_cmpk_lg_i32", []>; defm S_CMPK_GT_I32 : SOPK_SCC <sopk<0x05, 0x04>, "s_cmpk_gt_i32", []>; defm S_CMPK_GE_I32 : SOPK_SCC <sopk<0x06, 0x05>, "s_cmpk_ge_i32", []>; @@ -400,18 +393,27 @@ defm S_CMPK_LT_U32 : SOPK_SCC <sopk<0x0d, 0x0c>, "s_cmpk_lt_u32", []>; defm S_CMPK_LE_U32 : SOPK_SCC <sopk<0x0e, 0x0d>, "s_cmpk_le_u32", []>; } // End isCompare = 1 -let isCommutable = 1 in { - let Defs = [SCC], isCommutable = 1 in { - defm S_ADDK_I32 : SOPK_32 <sopk<0x0f, 0x0e>, "s_addk_i32", []>; - } - defm S_MULK_I32 : SOPK_32 <sopk<0x10, 0x0f>, "s_mulk_i32", []>; +let Defs = [SCC], isCommutable = 1, DisableEncoding = "$src0", + Constraints = "$sdst = $src0" in { + defm S_ADDK_I32 : SOPK_32TIE <sopk<0x0f, 0x0e>, "s_addk_i32", []>; + defm S_MULK_I32 : SOPK_32TIE <sopk<0x10, 0x0f>, "s_mulk_i32", []>; } -//defm S_CBRANCH_I_FORK : SOPK_ <sopk<0x11, 0x10>, "s_cbranch_i_fork", []>; +defm S_CBRANCH_I_FORK : SOPK_m < + sopk<0x11, 0x10>, "s_cbranch_i_fork", (outs), + (ins SReg_64:$sdst, u16imm:$simm16), " $sdst, $simm16" +>; defm S_GETREG_B32 : SOPK_32 <sopk<0x12, 0x11>, "s_getreg_b32", []>; -defm S_SETREG_B32 : SOPK_32 <sopk<0x13, 0x12>, "s_setreg_b32", []>; -defm S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32", []>; -//defm S_SETREG_IMM32_B32 : SOPK_32 <sopk<0x15, 0x14>, "s_setreg_imm32_b32", []>; +defm S_SETREG_B32 : SOPK_m < + sopk<0x13, 0x12>, "s_setreg_b32", (outs), + (ins SReg_32:$sdst, u16imm:$simm16), " $sdst, $simm16" +>; +// FIXME: Not on SI? +//defm S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32", []>; +defm S_SETREG_IMM32_B32 : SOPK_IMM32 < + sopk<0x15, 0x14>, "s_setreg_imm32_b32", (outs), + (ins i32imm:$imm, u16imm:$simm16), " $imm, $simm16" +>; //===----------------------------------------------------------------------===// // SOPP Instructions @@ -1630,7 +1632,6 @@ defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32", VOP_F32_F32_I32, AMDGPUldexp >; - defm V_CVT_PKACCUM_U8_F32 : VOP2_VI3_Inst <vop23<0x2c, 0x1f0>, "v_cvt_pkaccum_u8_f32", VOP_I32_F32_I32>; // TODO: set "Uses = dst" diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index 7bb5dc2..f289014 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -66,7 +66,7 @@ foreach Index = 0-255 in { //===----------------------------------------------------------------------===// // SGPR 32-bit registers -def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32, +def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add (sequence "SGPR%u", 0, 101))>; // SGPR 64-bit registers @@ -113,7 +113,7 @@ def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, (add (decimate (shl SGPR_32, 15), 4))]>; // VGPR 32-bit registers -def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32, +def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add (sequence "VGPR%u", 0, 255))>; // VGPR 64-bit registers @@ -169,6 +169,11 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, // Register classes used as source and destination //===----------------------------------------------------------------------===// +class RegImmMatcher<string name> : AsmOperandClass { + let Name = name; + let RenderMethod = "addRegOrImmOperands"; +} + // Special register classes for predicates and the M0 register def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)> { let CopyCost = -1; // Theoretically it is possible to read from SCC, @@ -180,7 +185,7 @@ def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>; def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>; // Register class for all scalar registers (SGPRs + Special Registers) -def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, +def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SGPR_32, M0Reg, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI) >; @@ -227,15 +232,21 @@ class RegInlineOperand <RegisterClass rc> : RegisterOperand<rc> { // SSrc_* Operands with an SGPR or a 32-bit immediate //===----------------------------------------------------------------------===// -def SSrc_32 : RegImmOperand<SReg_32>; +def SSrc_32 : RegImmOperand<SReg_32> { + let ParserMatchClass = RegImmMatcher<"SSrc32">; +} -def SSrc_64 : RegImmOperand<SReg_64>; +def SSrc_64 : RegImmOperand<SReg_64> { + let ParserMatchClass = RegImmMatcher<"SSrc64">; +} //===----------------------------------------------------------------------===// // SCSrc_* Operands with an SGPR or a inline constant //===----------------------------------------------------------------------===// -def SCSrc_32 : RegInlineOperand<SReg_32>; +def SCSrc_32 : RegInlineOperand<SReg_32> { + let ParserMatchClass = RegImmMatcher<"SCSrc32">; +} //===----------------------------------------------------------------------===// // VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate @@ -245,14 +256,30 @@ def VS_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VGPR_32, SReg_32)>; def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>; -def VSrc_32 : RegImmOperand<VS_32>; +def VSrc_32 : RegisterOperand<VS_32> { + let OperandNamespace = "AMDGPU"; + let OperandType = "OPERAND_REG_IMM32"; + let ParserMatchClass = RegImmMatcher<"VSrc32">; +} -def VSrc_64 : RegImmOperand<VS_64>; +def VSrc_64 : RegisterOperand<VS_64> { + let OperandNamespace = "AMDGPU"; + let OperandType = "OPERAND_REG_IMM32"; + let ParserMatchClass = RegImmMatcher<"VSrc64">; +} //===----------------------------------------------------------------------===// // VCSrc_* Operands with an SGPR, VGPR or an inline constant //===----------------------------------------------------------------------===// -def VCSrc_32 : RegInlineOperand<VS_32>; +def VCSrc_32 : RegisterOperand<VS_32> { + let OperandNamespace = "AMDGPU"; + let OperandType = "OPERAND_REG_INLINE_C"; + let ParserMatchClass = RegImmMatcher<"VCSrc32">; +} -def VCSrc_64 : RegInlineOperand<VS_64>; +def VCSrc_64 : RegisterOperand<VS_64> { + let OperandNamespace = "AMDGPU"; + let OperandType = "OPERAND_REG_INLINE_C"; + let ParserMatchClass = RegImmMatcher<"VCSrc64">; +} diff --git a/lib/Target/R600/SITypeRewriter.cpp b/lib/Target/R600/SITypeRewriter.cpp index 27bbf4f..591ce85 100644 --- a/lib/Target/R600/SITypeRewriter.cpp +++ b/lib/Target/R600/SITypeRewriter.cpp @@ -104,7 +104,7 @@ void SITypeRewriter::visitCallInst(CallInst &I) { SmallVector <Type*, 8> Types; bool NeedToReplace = false; Function *F = I.getCalledFunction(); - std::string Name = F->getName().str(); + std::string Name = F->getName(); for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { Value *Arg = I.getArgOperand(i); if (Arg->getType() == v16i8) { diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp index 5975a51..b6eebb0 100644 --- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp +++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp @@ -34,7 +34,7 @@ namespace Sparc { #define PRINT_ALIAS_INSTR #include "SparcGenAsmWriter.inc" -bool SparcInstPrinter::isV9() const { +bool SparcInstPrinter::isV9(const MCSubtargetInfo &STI) const { return (STI.getFeatureBits() & Sparc::FeatureV9) != 0; } @@ -44,15 +44,15 @@ void SparcInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const } void SparcInstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) -{ - if (!printAliasInstr(MI, O) && !printSparcAliasInstr(MI, O)) - printInstruction(MI, O); + StringRef Annot, const MCSubtargetInfo &STI) { + if (!printAliasInstr(MI, STI, O) && !printSparcAliasInstr(MI, STI, O)) + printInstruction(MI, STI, O); printAnnotation(O, Annot); } -bool SparcInstPrinter::printSparcAliasInstr(const MCInst *MI, raw_ostream &O) -{ +bool SparcInstPrinter::printSparcAliasInstr(const MCInst *MI, + const MCSubtargetInfo &STI, + raw_ostream &O) { switch (MI->getOpcode()) { default: return false; case SP::JMPLrr: @@ -72,16 +72,16 @@ bool SparcInstPrinter::printSparcAliasInstr(const MCInst *MI, raw_ostream &O) case SP::O7: O << "\tretl"; return true; } } - O << "\tjmp "; printMemOperand(MI, 1, O); + O << "\tjmp "; printMemOperand(MI, 1, STI, O); return true; case SP::O7: // call $addr - O << "\tcall "; printMemOperand(MI, 1, O); + O << "\tcall "; printMemOperand(MI, 1, STI, O); return true; } } case SP::V9FCMPS: case SP::V9FCMPD: case SP::V9FCMPQ: case SP::V9FCMPES: case SP::V9FCMPED: case SP::V9FCMPEQ: { - if (isV9() + if (isV9(STI) || (MI->getNumOperands() != 3) || (!MI->getOperand(0).isReg()) || (MI->getOperand(0).getReg() != SP::FCC0)) @@ -96,17 +96,17 @@ bool SparcInstPrinter::printSparcAliasInstr(const MCInst *MI, raw_ostream &O) case SP::V9FCMPED: O << "\tfcmped "; break; case SP::V9FCMPEQ: O << "\tfcmpeq "; break; } - printOperand(MI, 1, O); + printOperand(MI, 1, STI, O); O << ", "; - printOperand(MI, 2, O); + printOperand(MI, 2, STI, O); return true; } } } void SparcInstPrinter::printOperand(const MCInst *MI, int opNum, - raw_ostream &O) -{ + const MCSubtargetInfo &STI, + raw_ostream &O) { const MCOperand &MO = MI->getOperand (opNum); if (MO.isReg()) { @@ -124,14 +124,14 @@ void SparcInstPrinter::printOperand(const MCInst *MI, int opNum, } void SparcInstPrinter::printMemOperand(const MCInst *MI, int opNum, - raw_ostream &O, const char *Modifier) -{ - printOperand(MI, opNum, O); + const MCSubtargetInfo &STI, + raw_ostream &O, const char *Modifier) { + printOperand(MI, opNum, STI, O); // If this is an ADD operand, emit it like normal operands. if (Modifier && !strcmp(Modifier, "arith")) { O << ", "; - printOperand(MI, opNum+1, O); + printOperand(MI, opNum+1, STI, O); return; } const MCOperand &MO = MI->getOperand(opNum+1); @@ -143,12 +143,12 @@ void SparcInstPrinter::printMemOperand(const MCInst *MI, int opNum, O << "+"; - printOperand(MI, opNum+1, O); + printOperand(MI, opNum+1, STI, O); } void SparcInstPrinter::printCCOperand(const MCInst *MI, int opNum, - raw_ostream &O) -{ + const MCSubtargetInfo &STI, + raw_ostream &O) { int CC = (int)MI->getOperand(opNum).getImm(); switch (MI->getOpcode()) { default: break; @@ -171,8 +171,8 @@ void SparcInstPrinter::printCCOperand(const MCInst *MI, int opNum, } bool SparcInstPrinter::printGetPCX(const MCInst *MI, unsigned opNum, - raw_ostream &O) -{ + const MCSubtargetInfo &STI, + raw_ostream &O) { llvm_unreachable("FIXME: Implement SparcInstPrinter::printGetPCX."); return true; } diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h index c96d5ad..0b01b88 100644 --- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h +++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h @@ -22,32 +22,36 @@ namespace llvm { class MCOperand; class SparcInstPrinter : public MCInstPrinter { - const MCSubtargetInfo &STI; public: - SparcInstPrinter(const MCAsmInfo &MAI, - const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &sti) - : MCInstPrinter(MAI, MII, MRI), STI(sti) {} + SparcInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} void printRegName(raw_ostream &OS, unsigned RegNo) const override; - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; - bool printSparcAliasInstr(const MCInst *MI, raw_ostream &OS); - bool isV9() const; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; + bool printSparcAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &OS); + bool isV9(const MCSubtargetInfo &STI) const; // Autogenerated by tblgen. - void printInstruction(const MCInst *MI, raw_ostream &O); - bool printAliasInstr(const MCInst *MI, raw_ostream &O); + void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &O); + bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &O); void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx, - unsigned PrintMethodIdx, raw_ostream &O); + unsigned PrintMethodIdx, + const MCSubtargetInfo &STI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); - void printOperand(const MCInst *MI, int opNum, raw_ostream &OS); - void printMemOperand(const MCInst *MI, int opNum, raw_ostream &OS, - const char *Modifier = nullptr); - void printCCOperand(const MCInst *MI, int opNum, raw_ostream &OS); - bool printGetPCX(const MCInst *MI, unsigned OpNo, raw_ostream &OS); - + void printOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI, + raw_ostream &OS); + void printMemOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI, + raw_ostream &OS, const char *Modifier = nullptr); + void printCCOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI, + raw_ostream &OS); + bool printGetPCX(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &OS); }; } // end namespace llvm diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp index dcd81e3..4abb6b8 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp @@ -244,7 +244,7 @@ namespace { } - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(OSType); return createSparcELFObjectWriter(OS, is64Bit(), OSABI); } diff --git a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp index 5ba82f1..98ba7e6 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp @@ -26,7 +26,8 @@ namespace { Is64Bit ? ELF::EM_SPARCV9 : ELF::EM_SPARC, /*HasRelocationAddend*/ true) {} - virtual ~SparcELFObjectWriter() {} + ~SparcELFObjectWriter() override {} + protected: unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; @@ -104,9 +105,8 @@ unsigned SparcELFObjectWriter::GetRelocType(const MCValue &Target, return ELF::R_SPARC_NONE; } -MCObjectWriter *llvm::createSparcELFObjectWriter(raw_ostream &OS, - bool Is64Bit, - uint8_t OSABI) { +MCObjectWriter *llvm::createSparcELFObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit, uint8_t OSABI) { MCELFObjectTargetWriter *MOTW = new SparcELFObjectWriter(Is64Bit, OSABI); return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false); } diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp index 598856f..b447ab3 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp @@ -38,7 +38,7 @@ class SparcMCCodeEmitter : public MCCodeEmitter { public: SparcMCCodeEmitter(MCContext &ctx): Ctx(ctx) {} - ~SparcMCCodeEmitter() {} + ~SparcMCCodeEmitter() override {} void EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp index 630ed1b..7895404 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp @@ -134,13 +134,12 @@ static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S, return new SparcTargetAsmStreamer(S, OS); } -static MCInstPrinter *createSparcMCInstPrinter(const Target &T, - unsigned SyntaxVariant, - const MCAsmInfo &MAI, - const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { - return new SparcInstPrinter(MAI, MII, MRI, STI); +static MCInstPrinter *createSparcMCInstPrinter(const Triple &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI) { + return new SparcInstPrinter(MAI, MII, MRI); } extern "C" void LLVMInitializeSparcTargetMC() { diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h index d2ec991..5f38b12 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h @@ -26,6 +26,7 @@ class MCRegisterInfo; class MCSubtargetInfo; class Target; class StringRef; +class raw_pwrite_stream; class raw_ostream; extern Target TheSparcTarget; @@ -38,8 +39,7 @@ MCAsmBackend *createSparcAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); -MCObjectWriter *createSparcELFObjectWriter(raw_ostream &OS, - bool Is64Bit, +MCObjectWriter *createSparcELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, uint8_t OSABI); } // End llvm namespace diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td index 3159a46..c34122e 100644 --- a/lib/Target/Sparc/Sparc.td +++ b/lib/Target/Sparc/Sparc.td @@ -92,8 +92,15 @@ def : Proc<"niagara4", [FeatureV9, FeatureV8Deprecated, UsePopc, // Declare the target which we are implementing //===----------------------------------------------------------------------===// +def SparcAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + int PassSubtarget = 1; + int Variant = 0; +} + def Sparc : Target { // Pull in Instruction Info: let InstructionSet = SparcInstrInfo; let AssemblyParsers = [SparcAsmParser]; + let AssemblyWriters = [SparcAsmWriter]; } diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp index 0439f9d..56290e2 100644 --- a/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -58,7 +58,6 @@ namespace { void EmitFunctionBodyStart() override; void EmitInstruction(const MachineInstr *MI) override; - void EmitEndOfAsmFile(Module &M) override; static const char *getRegisterName(unsigned RegNo) { return SparcInstPrinter::getRegisterName(RegNo); @@ -442,23 +441,6 @@ bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } -void SparcAsmPrinter::EmitEndOfAsmFile(Module &M) { - const TargetLoweringObjectFileELF &TLOFELF = - static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering()); - MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); - - // Generate stubs for global variables. - MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); - if (!Stubs.empty()) { - OutStreamer.SwitchSection(TLOFELF.getDataSection()); - unsigned PtrSize = TM.getDataLayout()->getPointerSize(0); - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - OutStreamer.EmitLabel(Stubs[i].first); - OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), PtrSize); - } - } -} - // Force static initialization. extern "C" void LLVMInitializeSparcAsmPrinter() { RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget); diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index c8b0570..5b964af 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -727,7 +727,8 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align, false, // isVolatile, - (Size <= 32), // AlwaysInline if size <= 32 + (Size <= 32), // AlwaysInline if size <= 32, + false, // isTailCall MachinePointerInfo(), MachinePointerInfo()); ByValArgs.push_back(FIPtr); } diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.h b/lib/Target/Sparc/SparcSelectionDAGInfo.h index a3a21d6..6818291 100644 --- a/lib/Target/Sparc/SparcSelectionDAGInfo.h +++ b/lib/Target/Sparc/SparcSelectionDAGInfo.h @@ -23,7 +23,7 @@ class SparcTargetMachine; class SparcSelectionDAGInfo : public TargetSelectionDAGInfo { public: explicit SparcSelectionDAGInfo(const DataLayout &DL); - ~SparcSelectionDAGInfo(); + ~SparcSelectionDAGInfo() override; }; } diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt index 60a3912..336f037 100644 --- a/lib/Target/SystemZ/CMakeLists.txt +++ b/lib/Target/SystemZ/CMakeLists.txt @@ -29,6 +29,7 @@ add_llvm_target(SystemZCodeGen SystemZShortenInst.cpp SystemZSubtarget.cpp SystemZTargetMachine.cpp + SystemZTargetTransformInfo.cpp ) add_subdirectory(AsmParser) diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp index 23173bf..84400f8 100644 --- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp +++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp @@ -25,7 +25,7 @@ class SystemZDisassembler : public MCDisassembler { public: SystemZDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : MCDisassembler(STI, Ctx) {} - virtual ~SystemZDisassembler() {} + ~SystemZDisassembler() override {} DecodeStatus getInstruction(MCInst &instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp index 996a492..cf1ee54 100644 --- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp +++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp @@ -43,7 +43,8 @@ void SystemZInstPrinter::printOperand(const MCOperand &MO, raw_ostream &O) { } void SystemZInstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { + StringRef Annot, + const MCSubtargetInfo &STI) { printInstruction(MI, O); printAnnotation(O, Annot); } diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h index 732e5fa..6f56c7b 100644 --- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h +++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h @@ -39,7 +39,8 @@ public: // Override MCInstPrinter. void printRegName(raw_ostream &O, unsigned RegNo) const override; - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; private: // Print various types of operand. diff --git a/lib/Target/SystemZ/LLVMBuild.txt b/lib/Target/SystemZ/LLVMBuild.txt index 542aaee..6f8431d 100644 --- a/lib/Target/SystemZ/LLVMBuild.txt +++ b/lib/Target/SystemZ/LLVMBuild.txt @@ -31,5 +31,5 @@ has_jit = 1 type = Library name = SystemZCodeGen parent = SystemZ -required_libraries = AsmPrinter CodeGen Core MC SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target +required_libraries = Analysis AsmPrinter CodeGen Core MC SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target add_to_library_groups = SystemZ diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp index b79b1d8..1c3887a 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -62,7 +62,7 @@ public: llvm_unreachable("SystemZ does do not have assembler relaxation"); } bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createSystemZObjectWriter(OS, OSABI); } }; diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp index 40dc48e..8dd70b9 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp @@ -32,7 +32,7 @@ public: : MCII(mcii), Ctx(ctx) { } - ~SystemZMCCodeEmitter() {} + ~SystemZMCCodeEmitter() override {} // OVerride MCCodeEmitter. void EncodeInstruction(const MCInst &MI, raw_ostream &OS, diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp index 2632518..ee1af02 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp @@ -20,7 +20,7 @@ class SystemZObjectWriter : public MCELFObjectTargetWriter { public: SystemZObjectWriter(uint8_t OSABI); - virtual ~SystemZObjectWriter(); + ~SystemZObjectWriter() override; protected: // Override MCELFObjectTargetWriter. @@ -152,7 +152,7 @@ unsigned SystemZObjectWriter::GetRelocType(const MCValue &Target, } } -MCObjectWriter *llvm::createSystemZObjectWriter(raw_ostream &OS, +MCObjectWriter *llvm::createSystemZObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI) { MCELFObjectTargetWriter *MOTW = new SystemZObjectWriter(OSABI); return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false); diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index ffd05a9..ea56fb1 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -172,12 +172,11 @@ static MCCodeGenInfo *createSystemZMCCodeGenInfo(StringRef TT, Reloc::Model RM, return X; } -static MCInstPrinter *createSystemZMCInstPrinter(const Target &T, +static MCInstPrinter *createSystemZMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { + const MCRegisterInfo &MRI) { return new SystemZInstPrinter(MAI, MII, MRI); } diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h index 962c950..2b2647b 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h @@ -23,6 +23,7 @@ class MCRegisterInfo; class MCSubtargetInfo; class StringRef; class Target; +class raw_pwrite_stream; class raw_ostream; extern Target TheSystemZTarget; @@ -77,7 +78,7 @@ MCAsmBackend *createSystemZMCAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); -MCObjectWriter *createSystemZObjectWriter(raw_ostream &OS, uint8_t OSABI); +MCObjectWriter *createSystemZObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI); } // end namespace llvm // Defines symbolic names for SystemZ registers. diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h index 5f17edb..b3a7310 100644 --- a/lib/Target/SystemZ/SystemZ.h +++ b/lib/Target/SystemZ/SystemZ.h @@ -68,6 +68,18 @@ const unsigned CCMASK_TM_MSB_0 = CCMASK_0 | CCMASK_1; const unsigned CCMASK_TM_MSB_1 = CCMASK_2 | CCMASK_3; const unsigned CCMASK_TM = CCMASK_ANY; +// Condition-code mask assignments for TRANSACTION_BEGIN. +const unsigned CCMASK_TBEGIN_STARTED = CCMASK_0; +const unsigned CCMASK_TBEGIN_INDETERMINATE = CCMASK_1; +const unsigned CCMASK_TBEGIN_TRANSIENT = CCMASK_2; +const unsigned CCMASK_TBEGIN_PERSISTENT = CCMASK_3; +const unsigned CCMASK_TBEGIN = CCMASK_ANY; + +// Condition-code mask assignments for TRANSACTION_END. +const unsigned CCMASK_TEND_TX = CCMASK_0; +const unsigned CCMASK_TEND_NOTX = CCMASK_2; +const unsigned CCMASK_TEND = CCMASK_TEND_TX | CCMASK_TEND_NOTX; + // The position of the low CC bit in an IPM result. const unsigned IPM_CC = 28; diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp index 18e37e3..2524733 100644 --- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -255,29 +255,6 @@ bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } -void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) { - if (Triple(TM.getTargetTriple()).isOSBinFormatELF()) { - auto &TLOFELF = - static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering()); - - MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); - - // Output stubs for external and common global variables. - MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); - if (!Stubs.empty()) { - OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); - const DataLayout *TD = TM.getDataLayout(); - - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - OutStreamer.EmitLabel(Stubs[i].first); - OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), - TD->getPointerSize(0)); - } - Stubs.clear(); - } - } -} - // Force static initialization. extern "C" void LLVMInitializeSystemZAsmPrinter() { RegisterAsmPrinter<SystemZAsmPrinter> X(TheSystemZTarget); diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.h b/lib/Target/SystemZ/SystemZAsmPrinter.h index a4d5b78..7f6e823 100644 --- a/lib/Target/SystemZ/SystemZAsmPrinter.h +++ b/lib/Target/SystemZ/SystemZAsmPrinter.h @@ -38,7 +38,6 @@ public: bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &OS) override; - void EmitEndOfAsmFile(Module &M) override; }; } // end namespace llvm diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index a52aa25..1a58b53 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -898,6 +898,9 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { } unsigned Opcode = SystemZ::RISBG; + // Prefer RISBGN if available, since it does not clobber CC. + if (Subtarget->hasMiscellaneousExtensions()) + Opcode = SystemZ::RISBGN; EVT OpcodeVT = MVT::i64; if (VT == MVT::i32 && Subtarget->hasHighWord()) { Opcode = SystemZ::RISBMux; @@ -945,9 +948,13 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) { // See whether we can avoid an AND in the first operand by converting // ROSBG to RISBG. - if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask)) + if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask)) { Opcode = SystemZ::RISBG; - + // Prefer RISBGN if available, since it does not clobber CC. + if (Subtarget->hasMiscellaneousExtensions()) + Opcode = SystemZ::RISBGN; + } + EVT VT = N->getValueType(0); SDValue Ops[5] = { convertTo(SDLoc(N), MVT::i64, Op0), diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 0ca8bcd..21882cb 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/Intrinsics.h" #include <cctype> using namespace llvm; @@ -163,8 +164,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm, // available, or if the operand is constant. setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); + // Use POPCNT on z196 and above. + if (Subtarget.hasPopulationCount()) + setOperationAction(ISD::CTPOP, VT, Custom); + else + setOperationAction(ISD::CTPOP, VT, Expand); + // No special instructions for these. - setOperationAction(ISD::CTPOP, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); @@ -299,6 +305,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm, // Codes for which we want to perform some z-specific combinations. setTargetDAGCombine(ISD::SIGN_EXTEND); + // Handle intrinsics. + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + // We want to use MVC in preference to even a single load/store pair. MaxStoresPerMemcpy = 0; MaxStoresPerMemcpyOptSize = 0; @@ -342,6 +351,16 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { return Imm.isZero() || Imm.isNegZero(); } +bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { + // We can use CGFI or CLGFI. + return isInt<32>(Imm) || isUInt<32>(Imm); +} + +bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const { + // We can use ALGFI or SLGFI. + return isUInt<32>(Imm) || isUInt<32>(-Imm); +} + bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, unsigned, @@ -1016,6 +1035,53 @@ prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, SelectionDAG &DAG) const { return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain); } +// Return true if Op is an intrinsic node with chain that returns the CC value +// as its only (other) argument. Provide the associated SystemZISD opcode and +// the mask of valid CC values if so. +static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, + unsigned &CCValid) { + unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + switch (Id) { + case Intrinsic::s390_tbegin: + Opcode = SystemZISD::TBEGIN; + CCValid = SystemZ::CCMASK_TBEGIN; + return true; + + case Intrinsic::s390_tbegin_nofloat: + Opcode = SystemZISD::TBEGIN_NOFLOAT; + CCValid = SystemZ::CCMASK_TBEGIN; + return true; + + case Intrinsic::s390_tend: + Opcode = SystemZISD::TEND; + CCValid = SystemZ::CCMASK_TEND; + return true; + + default: + return false; + } +} + +// Emit an intrinsic with chain with a glued value instead of its CC result. +static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op, + unsigned Opcode) { + // Copy all operands except the intrinsic ID. + unsigned NumOps = Op.getNumOperands(); + SmallVector<SDValue, 6> Ops; + Ops.reserve(NumOps - 1); + Ops.push_back(Op.getOperand(0)); + for (unsigned I = 2; I < NumOps; ++I) + Ops.push_back(Op.getOperand(I)); + + assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); + SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); + SDValue OldChain = SDValue(Op.getNode(), 1); + SDValue NewChain = SDValue(Intr.getNode(), 0); + DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain); + return Intr; +} + // CC is a comparison that will be implemented using an integer or // floating-point comparison. Return the condition code mask for // a branch on true. In the integer case, CCMASK_CMP_UO is set for @@ -1530,6 +1596,8 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) { MaskVal = -(CmpVal & -CmpVal); NewC.ICmpType = SystemZICMP::UnsignedOnly; } + if (!MaskVal) + return; // Check whether the combination of mask, comparison value and comparison // type are suitable. @@ -1571,9 +1639,53 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) { C.CCMask = NewCCMask; } +// Return a Comparison that tests the condition-code result of intrinsic +// node Call against constant integer CC using comparison code Cond. +// Opcode is the opcode of the SystemZISD operation for the intrinsic +// and CCValid is the set of possible condition-code results. +static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, + SDValue Call, unsigned CCValid, uint64_t CC, + ISD::CondCode Cond) { + Comparison C(Call, SDValue()); + C.Opcode = Opcode; + C.CCValid = CCValid; + if (Cond == ISD::SETEQ) + // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3. + C.CCMask = CC < 4 ? 1 << (3 - CC) : 0; + else if (Cond == ISD::SETNE) + // ...and the inverse of that. + C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1; + else if (Cond == ISD::SETLT || Cond == ISD::SETULT) + // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3, + // always true for CC>3. + C.CCMask = CC < 4 ? -1 << (4 - CC) : -1; + else if (Cond == ISD::SETGE || Cond == ISD::SETUGE) + // ...and the inverse of that. + C.CCMask = CC < 4 ? ~(-1 << (4 - CC)) : 0; + else if (Cond == ISD::SETLE || Cond == ISD::SETULE) + // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true), + // always true for CC>3. + C.CCMask = CC < 4 ? -1 << (3 - CC) : -1; + else if (Cond == ISD::SETGT || Cond == ISD::SETUGT) + // ...and the inverse of that. + C.CCMask = CC < 4 ? ~(-1 << (3 - CC)) : 0; + else + llvm_unreachable("Unexpected integer comparison type"); + C.CCMask &= CCValid; + return C; +} + // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1. static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond) { + if (CmpOp1.getOpcode() == ISD::Constant) { + uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue(); + unsigned Opcode, CCValid; + if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN && + CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) && + isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid)) + return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond); + } Comparison C(CmpOp0, CmpOp1); C.CCMask = CCMaskForCondCode(Cond); if (C.Op0.getValueType().isFloatingPoint()) { @@ -1615,6 +1727,17 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, // Emit the comparison instruction described by C. static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { + if (!C.Op1.getNode()) { + SDValue Op; + switch (C.Op0.getOpcode()) { + case ISD::INTRINSIC_W_CHAIN: + Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode); + break; + default: + llvm_unreachable("Invalid comparison operands"); + } + return SDValue(Op.getNode(), Op->getNumValues() - 1); + } if (C.Opcode == SystemZISD::ICMP) return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1, DAG.getConstant(C.ICmpType, MVT::i32)); @@ -1696,7 +1819,6 @@ SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, } SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { - SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); SDValue CmpOp0 = Op.getOperand(2); SDValue CmpOp1 = Op.getOperand(3); @@ -1706,7 +1828,7 @@ SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC)); SDValue Glue = emitCmp(DAG, DL, C); return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(), - Chain, DAG.getConstant(C.CCValid, MVT::i32), + Op.getOperand(0), DAG.getConstant(C.CCValid, MVT::i32), DAG.getConstant(C.CCMask, MVT::i32), Dest, Glue); } @@ -2100,6 +2222,7 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32), /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false, + /*isTailCall*/false, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } @@ -2292,6 +2415,46 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { MVT::i64, HighOp, Low32); } +SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + int64_t OrigBitSize = VT.getSizeInBits(); + SDLoc DL(Op); + + // Get the known-zero mask for the operand. + Op = Op.getOperand(0); + APInt KnownZero, KnownOne; + DAG.computeKnownBits(Op, KnownZero, KnownOne); + unsigned NumSignificantBits = (~KnownZero).getActiveBits(); + if (NumSignificantBits == 0) + return DAG.getConstant(0, VT); + + // Skip known-zero high parts of the operand. + int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits); + BitSize = std::min(BitSize, OrigBitSize); + + // The POPCNT instruction counts the number of bits in each byte. + Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op); + Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op); + Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); + + // Add up per-byte counts in a binary tree. All bits of Op at + // position larger than BitSize remain zero throughout. + for (int64_t I = BitSize / 2; I >= 8; I = I / 2) { + SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, VT)); + if (BitSize != OrigBitSize) + Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp, + DAG.getConstant(((uint64_t)1 << BitSize) - 1, VT)); + Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); + } + + // Extract overall result from high byte. + if (BitSize > 8) + Op = DAG.getNode(ISD::SRL, DL, VT, Op, DAG.getConstant(BitSize - 8, VT)); + + return Op; +} + // Op is an atomic load. Lower it into a normal volatile load. SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const { @@ -2505,6 +2668,30 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, Node->getMemoryVT(), Node->getMemOperand()); } +// Return an i32 that contains the value of CC immediately after After, +// whose final operand must be MVT::Glue. +static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) { + SDValue Glue = SDValue(After, After->getNumValues() - 1); + SDValue IPM = DAG.getNode(SystemZISD::IPM, SDLoc(After), MVT::i32, Glue); + return DAG.getNode(ISD::SRL, SDLoc(After), MVT::i32, IPM, + DAG.getConstant(SystemZ::IPM_CC, MVT::i32)); +} + +SDValue +SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned Opcode, CCValid; + if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) { + assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); + SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode); + SDValue CC = getCCResult(DAG, Glued.getNode()); + DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC); + return SDValue(); + } + + return SDValue(); +} + SDValue SystemZTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -2542,6 +2729,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerUDIVREM(Op, DAG); case ISD::OR: return lowerOR(Op, DAG); + case ISD::CTPOP: + return lowerCTPOP(Op, DAG); case ISD::ATOMIC_SWAP: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW); case ISD::ATOMIC_STORE: @@ -2576,6 +2765,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerSTACKRESTORE(Op, DAG); case ISD::PREFETCH: return lowerPREFETCH(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: + return lowerINTRINSIC_W_CHAIN(Op, DAG); default: llvm_unreachable("Unexpected node to lower"); } @@ -2616,6 +2807,9 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(SEARCH_STRING); OPCODE(IPM); OPCODE(SERIALIZE); + OPCODE(TBEGIN); + OPCODE(TBEGIN_NOFLOAT); + OPCODE(TEND); OPCODE(ATOMIC_SWAPW); OPCODE(ATOMIC_LOADW_ADD); OPCODE(ATOMIC_LOADW_SUB); @@ -3443,6 +3637,50 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI, return DoneMBB; } +// Update TBEGIN instruction with final opcode and register clobbers. +MachineBasicBlock * +SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned Opcode, + bool NoFloat) const { + MachineFunction &MF = *MBB->getParent(); + const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); + + // Update opcode. + MI->setDesc(TII->get(Opcode)); + + // We cannot handle a TBEGIN that clobbers the stack or frame pointer. + // Make sure to add the corresponding GRSM bits if they are missing. + uint64_t Control = MI->getOperand(2).getImm(); + static const unsigned GPRControlBit[16] = { + 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000, + 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100 + }; + Control |= GPRControlBit[15]; + if (TFI->hasFP(MF)) + Control |= GPRControlBit[11]; + MI->getOperand(2).setImm(Control); + + // Add GPR clobbers. + for (int I = 0; I < 16; I++) { + if ((Control & GPRControlBit[I]) == 0) { + unsigned Reg = SystemZMC::GR64Regs[I]; + MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); + } + } + + // Add FPR clobbers. + if (!NoFloat && (Control & 4) != 0) { + for (int I = 0; I < 16; I++) { + unsigned Reg = SystemZMC::FP64Regs[I]; + MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); + } + } + + return MBB; +} + MachineBasicBlock *SystemZTargetLowering:: EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { switch (MI->getOpcode()) { @@ -3684,6 +3922,12 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { return emitStringWrapper(MI, MBB, SystemZ::MVST); case SystemZ::SRSTLoop: return emitStringWrapper(MI, MBB, SystemZ::SRST); + case SystemZ::TBEGIN: + return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false); + case SystemZ::TBEGIN_nofloat: + return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true); + case SystemZ::TBEGINC: + return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true); default: llvm_unreachable("Unexpected instr type to insert"); } diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 23c62c9..56d7ef4 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -87,6 +87,9 @@ enum { // the number of the register. EXTRACT_ACCESS, + // Count number of bits set in operand 0 per byte. + POPCNT, + // Wrappers around the ISD opcodes of the same name. The output and // first input operands are GR128s. The trailing numbers are the // widths of the second operand in bits. @@ -143,6 +146,15 @@ enum { // Perform a serialization operation. (BCR 15,0 or BCR 14,0.) SERIALIZE, + // Transaction begin. The first operand is the chain, the second + // the TDB pointer, and the third the immediate control field. + // Returns chain and glue. + TBEGIN, + TBEGIN_NOFLOAT, + + // Transaction end. Just the chain operand. Returns chain and glue. + TEND, + // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or // ATOMIC_LOAD_<op>. // @@ -213,6 +225,8 @@ public: EVT getSetCCResultType(LLVMContext &, EVT) const override; bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; + bool isLegalICmpImmediate(int64_t Imm) const override; + bool isLegalAddImmediate(int64_t Imm) const override; bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, @@ -302,6 +316,7 @@ private: SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const; SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG, @@ -312,6 +327,7 @@ private: SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; // If the last instruction before MBBI in MBB was some form of COMPARE, // try to replace it with a COMPARE AND BRANCH just before MBBI. @@ -349,6 +365,10 @@ private: MachineBasicBlock *emitStringWrapper(MachineInstr *MI, MachineBasicBlock *BB, unsigned Opcode) const; + MachineBasicBlock *emitTransactionBegin(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned Opcode, + bool NoFloat) const; }; } // end namespace llvm diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index 9f59a1c..2d3c9e2 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -473,6 +473,17 @@ class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> let Inst{15-0} = BD2; } +class InstS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<16> BD2; + + let Inst{31-16} = op; + let Inst{15-0} = BD2; +} + //===----------------------------------------------------------------------===// // Instruction definitions with semantics //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 5128993..3a02859 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -723,9 +723,12 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned Start, End; if (isRxSBGMask(Imm, And.RegSize, Start, End)) { unsigned NewOpcode; - if (And.RegSize == 64) + if (And.RegSize == 64) { NewOpcode = SystemZ::RISBG; - else { + // Prefer RISBGN if available, since it does not clobber CC. + if (STI.hasMiscellaneousExtensions()) + NewOpcode = SystemZ::RISBGN; + } else { NewOpcode = SystemZ::RISBMux; Start &= 31; End &= 31; @@ -1146,17 +1149,22 @@ unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode, unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const { switch (Opcode) { - case SystemZ::L: return SystemZ::LT; - case SystemZ::LY: return SystemZ::LT; - case SystemZ::LG: return SystemZ::LTG; - case SystemZ::LGF: return SystemZ::LTGF; - case SystemZ::LR: return SystemZ::LTR; - case SystemZ::LGFR: return SystemZ::LTGFR; - case SystemZ::LGR: return SystemZ::LTGR; - case SystemZ::LER: return SystemZ::LTEBR; - case SystemZ::LDR: return SystemZ::LTDBR; - case SystemZ::LXR: return SystemZ::LTXBR; - default: return 0; + case SystemZ::L: return SystemZ::LT; + case SystemZ::LY: return SystemZ::LT; + case SystemZ::LG: return SystemZ::LTG; + case SystemZ::LGF: return SystemZ::LTGF; + case SystemZ::LR: return SystemZ::LTR; + case SystemZ::LGFR: return SystemZ::LTGFR; + case SystemZ::LGR: return SystemZ::LTGR; + case SystemZ::LER: return SystemZ::LTEBR; + case SystemZ::LDR: return SystemZ::LTDBR; + case SystemZ::LXR: return SystemZ::LTXBR; + // On zEC12 we prefer to use RISBGN. But if there is a chance to + // actually use the condition code, we may turn it back into RISGB. + // Note that RISBG is not really a "load-and-test" instruction, + // but sets the same condition code values, so is OK to use here. + case SystemZ::RISBGN: return SystemZ::RISBG; + default: return 0; } } diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index a7f7747..820f30b 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -1061,6 +1061,10 @@ let Defs = [CC] in { def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>; } +// On zEC12 we have a variant of RISBG that does not set CC. +let Predicates = [FeatureMiscellaneousExtensions] in + def RISBGN : RotateSelectRIEf<"risbgn", 0xEC59, GR64, GR64>; + // Forms of RISBG that only affect one word of the destination register. // They do not set CC. let Predicates = [FeatureHighWord] in { @@ -1358,6 +1362,60 @@ let Defs = [CC] in { } //===----------------------------------------------------------------------===// +// Transactional execution +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureTransactionalExecution] in { + // Transaction Begin + let hasSideEffects = 1, mayStore = 1, + usesCustomInserter = 1, Defs = [CC] in { + def TBEGIN : InstSIL<0xE560, + (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2), + "tbegin\t$BD1, $I2", + [(z_tbegin bdaddr12only:$BD1, imm32zx16:$I2)]>; + def TBEGIN_nofloat : Pseudo<(outs), (ins bdaddr12only:$BD1, imm32zx16:$I2), + [(z_tbegin_nofloat bdaddr12only:$BD1, + imm32zx16:$I2)]>; + def TBEGINC : InstSIL<0xE561, + (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2), + "tbeginc\t$BD1, $I2", + [(int_s390_tbeginc bdaddr12only:$BD1, + imm32zx16:$I2)]>; + } + + // Transaction End + let hasSideEffects = 1, Defs = [CC], BD2 = 0 in + def TEND : InstS<0xB2F8, (outs), (ins), "tend", [(z_tend)]>; + + // Transaction Abort + let hasSideEffects = 1, isTerminator = 1, isBarrier = 1 in + def TABORT : InstS<0xB2FC, (outs), (ins bdaddr12only:$BD2), + "tabort\t$BD2", + [(int_s390_tabort bdaddr12only:$BD2)]>; + + // Nontransactional Store + let hasSideEffects = 1 in + def NTSTG : StoreRXY<"ntstg", 0xE325, int_s390_ntstg, GR64, 8>; + + // Extract Transaction Nesting Depth + let hasSideEffects = 1 in + def ETND : InherentRRE<"etnd", 0xB2EC, GR32, (int_s390_etnd)>; +} + +//===----------------------------------------------------------------------===// +// Processor assist +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureProcessorAssist] in { + let hasSideEffects = 1, R4 = 0 in + def PPA : InstRRF<0xB2E8, (outs), (ins GR64:$R1, GR64:$R2, imm32zx4:$R3), + "ppa\t$R1, $R2, $R3", []>; + def : Pat<(int_s390_ppa_txassist GR32:$src), + (PPA (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32), + 0, 1)>; +} + +//===----------------------------------------------------------------------===// // Miscellaneous Instructions. //===----------------------------------------------------------------------===// @@ -1382,6 +1440,13 @@ let Defs = [CC] in { def : Pat<(ctlz GR64:$src), (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>; +// Population count. Counts bits set per byte. +let Predicates = [FeaturePopulationCount], Defs = [CC] in { + def POPCNT : InstRRE<0xB9E1, (outs GR64:$R1), (ins GR64:$R2), + "popcnt\t$R1, $R2", + [(set GR64:$R1, (z_popcnt GR64:$R2))]>; +} + // Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext. def : Pat<(i64 (anyext GR32:$src)), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>; diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index 51ac5da..3151052 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -79,6 +79,9 @@ def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>; def SDT_ZPrefetch : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; +def SDT_ZTBegin : SDTypeProfile<0, 2, + [SDTCisPtrTy<0>, + SDTCisVT<1, i32>]>; //===----------------------------------------------------------------------===// // Node definitions @@ -121,6 +124,7 @@ def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask, def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>; def z_extract_access : SDNode<"SystemZISD::EXTRACT_ACCESS", SDT_ZExtractAccess>; +def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>; def z_umul_lohi64 : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>; def z_sdivrem32 : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>; def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>; @@ -179,6 +183,15 @@ def z_prefetch : SDNode<"SystemZISD::PREFETCH", SDT_ZPrefetch, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; +def z_tbegin : SDNode<"SystemZISD::TBEGIN", SDT_ZTBegin, + [SDNPHasChain, SDNPOutGlue, SDNPMayStore, + SDNPSideEffect]>; +def z_tbegin_nofloat : SDNode<"SystemZISD::TBEGIN_NOFLOAT", SDT_ZTBegin, + [SDNPHasChain, SDNPOutGlue, SDNPMayStore, + SDNPSideEffect]>; +def z_tend : SDNode<"SystemZISD::TEND", SDTNone, + [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; + //===----------------------------------------------------------------------===// // Pattern fragments //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZProcessors.td b/lib/Target/SystemZ/SystemZProcessors.td index 1594854..15614c9 100644 --- a/lib/Target/SystemZ/SystemZProcessors.td +++ b/lib/Target/SystemZ/SystemZProcessors.td @@ -39,6 +39,11 @@ def FeatureFPExtension : SystemZFeature< "Assume that the floating-point extension facility is installed" >; +def FeaturePopulationCount : SystemZFeature< + "population-count", "PopulationCount", + "Assume that the population-count facility is installed" +>; + def FeatureFastSerialization : SystemZFeature< "fast-serialization", "FastSerialization", "Assume that the fast-serialization facility is installed" @@ -50,13 +55,30 @@ def FeatureInterlockedAccess1 : SystemZFeature< >; def FeatureNoInterlockedAccess1 : SystemZMissingFeature<"InterlockedAccess1">; +def FeatureMiscellaneousExtensions : SystemZFeature< + "miscellaneous-extensions", "MiscellaneousExtensions", + "Assume that the miscellaneous-extensions facility is installed" +>; + +def FeatureTransactionalExecution : SystemZFeature< + "transactional-execution", "TransactionalExecution", + "Assume that the transactional-execution facility is installed" +>; + +def FeatureProcessorAssist : SystemZFeature< + "processor-assist", "ProcessorAssist", + "Assume that the processor-assist facility is installed" +>; + def : Processor<"generic", NoItineraries, []>; def : Processor<"z10", NoItineraries, []>; def : Processor<"z196", NoItineraries, [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord, - FeatureFPExtension, FeatureFastSerialization, - FeatureInterlockedAccess1]>; + FeatureFPExtension, FeaturePopulationCount, + FeatureFastSerialization, FeatureInterlockedAccess1]>; def : Processor<"zEC12", NoItineraries, [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord, - FeatureFPExtension, FeatureFastSerialization, - FeatureInterlockedAccess1]>; + FeatureFPExtension, FeaturePopulationCount, + FeatureFastSerialization, FeatureInterlockedAccess1, + FeatureMiscellaneousExtensions, + FeatureTransactionalExecution, FeatureProcessorAssist]>; diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp index 31a2bff..de725ae 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -10,7 +10,6 @@ #include "SystemZSubtarget.h" #include "MCTargetDesc/SystemZMCTargetDesc.h" #include "llvm/IR/GlobalValue.h" -#include "llvm/Support/Host.h" using namespace llvm; @@ -28,10 +27,6 @@ SystemZSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { std::string CPUName = CPU; if (CPUName.empty()) CPUName = "generic"; -#if defined(__linux__) && defined(__s390x__) - if (CPUName == "generic") - CPUName = sys::getHostCPUName(); -#endif // Parse features string. ParseSubtargetFeatures(CPUName, FS); return *this; @@ -43,7 +38,9 @@ SystemZSubtarget::SystemZSubtarget(const std::string &TT, const TargetMachine &TM) : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false), HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false), - HasFastSerialization(false), HasInterlockedAccess1(false), + HasPopulationCount(false), HasFastSerialization(false), + HasInterlockedAccess1(false), HasMiscellaneousExtensions(false), + HasTransactionalExecution(false), HasProcessorAssist(false), TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), TSInfo(*TM.getDataLayout()), FrameLowering() {} diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h index 99cb1ad..c99e552 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.h +++ b/lib/Target/SystemZ/SystemZSubtarget.h @@ -38,8 +38,12 @@ protected: bool HasLoadStoreOnCond; bool HasHighWord; bool HasFPExtension; + bool HasPopulationCount; bool HasFastSerialization; bool HasInterlockedAccess1; + bool HasMiscellaneousExtensions; + bool HasTransactionalExecution; + bool HasProcessorAssist; private: Triple TargetTriple; @@ -86,12 +90,26 @@ public: // Return true if the target has the floating-point extension facility. bool hasFPExtension() const { return HasFPExtension; } + // Return true if the target has the population-count facility. + bool hasPopulationCount() const { return HasPopulationCount; } + // Return true if the target has the fast-serialization facility. bool hasFastSerialization() const { return HasFastSerialization; } // Return true if the target has interlocked-access facility 1. bool hasInterlockedAccess1() const { return HasInterlockedAccess1; } + // Return true if the target has the miscellaneous-extensions facility. + bool hasMiscellaneousExtensions() const { + return HasMiscellaneousExtensions; + } + + // Return true if the target has the transactional-execution facility. + bool hasTransactionalExecution() const { return HasTransactionalExecution; } + + // Return true if the target has the processor-assist facility. + bool hasProcessorAssist() const { return HasProcessorAssist; } + // Return true if GV can be accessed using LARL for reloc model RM // and code model CM. bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM, diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index 86baccb..b2f8175 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "SystemZTargetMachine.h" +#include "SystemZTargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Transforms/Scalar.h" @@ -108,3 +109,9 @@ void SystemZPassConfig::addPreEmitPass() { TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) { return new SystemZPassConfig(this, PM); } + +TargetIRAnalysis SystemZTargetMachine::getTargetIRAnalysis() { + return TargetIRAnalysis([this](Function &F) { + return TargetTransformInfo(SystemZTTIImpl(this, F)); + }); +} diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h index 181b926..5ded07c 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/lib/Target/SystemZ/SystemZTargetMachine.h @@ -39,6 +39,7 @@ public: } // Override LLVMTargetMachine TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + TargetIRAnalysis getTargetIRAnalysis() override; TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp new file mode 100644 index 0000000..3337f63 --- /dev/null +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -0,0 +1,240 @@ +//===-- SystemZTargetTransformInfo.cpp - SystemZ-specific TTI -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a TargetTransformInfo analysis pass specific to the +// SystemZ target machine. It uses the target's detailed information to provide +// more precise answers to certain TTI queries, while letting the target +// independent and default TTI implementations handle the rest. +// +//===----------------------------------------------------------------------===// + +#include "SystemZTargetTransformInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/CostTable.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +#define DEBUG_TYPE "systemztti" + +//===----------------------------------------------------------------------===// +// +// SystemZ cost model. +// +//===----------------------------------------------------------------------===// + +unsigned SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + // There is no cost model for constants with a bit size of 0. Return TCC_Free + // here, so that constant hoisting will ignore this constant. + if (BitSize == 0) + return TTI::TCC_Free; + // No cost model for operations on integers larger than 64 bit implemented yet. + if (BitSize > 64) + return TTI::TCC_Free; + + if (Imm == 0) + return TTI::TCC_Free; + + if (Imm.getBitWidth() <= 64) { + // Constants loaded via lgfi. + if (isInt<32>(Imm.getSExtValue())) + return TTI::TCC_Basic; + // Constants loaded via llilf. + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Basic; + // Constants loaded via llihf: + if ((Imm.getZExtValue() & 0xffffffff) == 0) + return TTI::TCC_Basic; + + return 2 * TTI::TCC_Basic; + } + + return 4 * TTI::TCC_Basic; +} + +unsigned SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty) { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + // There is no cost model for constants with a bit size of 0. Return TCC_Free + // here, so that constant hoisting will ignore this constant. + if (BitSize == 0) + return TTI::TCC_Free; + // No cost model for operations on integers larger than 64 bit implemented yet. + if (BitSize > 64) + return TTI::TCC_Free; + + switch (Opcode) { + default: + return TTI::TCC_Free; + case Instruction::GetElementPtr: + // Always hoist the base address of a GetElementPtr. This prevents the + // creation of new constants for every base constant that gets constant + // folded with the offset. + if (Idx == 0) + return 2 * TTI::TCC_Basic; + return TTI::TCC_Free; + case Instruction::Store: + if (Idx == 0 && Imm.getBitWidth() <= 64) { + // Any 8-bit immediate store can by implemented via mvi. + if (BitSize == 8) + return TTI::TCC_Free; + // 16-bit immediate values can be stored via mvhhi/mvhi/mvghi. + if (isInt<16>(Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Instruction::ICmp: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // Comparisons against signed 32-bit immediates implemented via cgfi. + if (isInt<32>(Imm.getSExtValue())) + return TTI::TCC_Free; + // Comparisons against unsigned 32-bit immediates implemented via clgfi. + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Free; + } + break; + case Instruction::Add: + case Instruction::Sub: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // We use algfi/slgfi to add/subtract 32-bit unsigned immediates. + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Free; + // Or their negation, by swapping addition vs. subtraction. + if (isUInt<32>(-Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Instruction::Mul: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // We use msgfi to multiply by 32-bit signed immediates. + if (isInt<32>(Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Instruction::Or: + case Instruction::Xor: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // Masks supported by oilf/xilf. + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Free; + // Masks supported by oihf/xihf. + if ((Imm.getZExtValue() & 0xffffffff) == 0) + return TTI::TCC_Free; + } + break; + case Instruction::And: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // Any 32-bit AND operation can by implemented via nilf. + if (BitSize <= 32) + return TTI::TCC_Free; + // 64-bit masks supported by nilf. + if (isUInt<32>(~Imm.getZExtValue())) + return TTI::TCC_Free; + // 64-bit masks supported by nilh. + if ((Imm.getZExtValue() & 0xffffffff) == 0xffffffff) + return TTI::TCC_Free; + // Some 64-bit AND operations can be implemented via risbg. + const SystemZInstrInfo *TII = ST->getInstrInfo(); + unsigned Start, End; + if (TII->isRxSBGMask(Imm.getZExtValue(), BitSize, Start, End)) + return TTI::TCC_Free; + } + break; + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + // Always return TCC_Free for the shift value of a shift instruction. + if (Idx == 1) + return TTI::TCC_Free; + break; + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::IntToPtr: + case Instruction::PtrToInt: + case Instruction::BitCast: + case Instruction::PHI: + case Instruction::Call: + case Instruction::Select: + case Instruction::Ret: + case Instruction::Load: + break; + } + + return SystemZTTIImpl::getIntImmCost(Imm, Ty); +} + +unsigned SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty) { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + // There is no cost model for constants with a bit size of 0. Return TCC_Free + // here, so that constant hoisting will ignore this constant. + if (BitSize == 0) + return TTI::TCC_Free; + // No cost model for operations on integers larger than 64 bit implemented yet. + if (BitSize > 64) + return TTI::TCC_Free; + + switch (IID) { + default: + return TTI::TCC_Free; + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + // These get expanded to include a normal addition/subtraction. + if (Idx == 1 && Imm.getBitWidth() <= 64) { + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Free; + if (isUInt<32>(-Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: + // These get expanded to include a normal multiplication. + if (Idx == 1 && Imm.getBitWidth() <= 64) { + if (isInt<32>(Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Intrinsic::experimental_stackmap: + if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) + return TTI::TCC_Free; + break; + case Intrinsic::experimental_patchpoint_void: + case Intrinsic::experimental_patchpoint_i64: + if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) + return TTI::TCC_Free; + break; + } + return SystemZTTIImpl::getIntImmCost(Imm, Ty); +} + +TargetTransformInfo::PopcntSupportKind +SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) { + assert(isPowerOf2_32(TyWidth) && "Type width must be power of 2"); + if (ST->hasPopulationCount() && TyWidth <= 64) + return TTI::PSK_FastHardware; + return TTI::PSK_Software; +} + diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h new file mode 100644 index 0000000..d498913 --- /dev/null +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -0,0 +1,70 @@ +//===-- SystemZTargetTransformInfo.h - SystemZ-specific TTI ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETTRANSFORMINFO_H + +#include "SystemZTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" + +namespace llvm { + +class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> { + typedef BasicTTIImplBase<SystemZTTIImpl> BaseT; + typedef TargetTransformInfo TTI; + friend BaseT; + + const SystemZSubtarget *ST; + const SystemZTargetLowering *TLI; + + const SystemZSubtarget *getST() const { return ST; } + const SystemZTargetLowering *getTLI() const { return TLI; } + +public: + explicit SystemZTTIImpl(const SystemZTargetMachine *TM, Function &F) + : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} + + // Provide value semantics. MSVC requires that we spell all of these out. + SystemZTTIImpl(const SystemZTTIImpl &Arg) + : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {} + SystemZTTIImpl(SystemZTTIImpl &&Arg) + : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)), + TLI(std::move(Arg.TLI)) {} + SystemZTTIImpl &operator=(const SystemZTTIImpl &RHS) { + BaseT::operator=(static_cast<const BaseT &>(RHS)); + ST = RHS.ST; + TLI = RHS.TLI; + return *this; + } + SystemZTTIImpl &operator=(SystemZTTIImpl &&RHS) { + BaseT::operator=(std::move(static_cast<BaseT &>(RHS))); + ST = std::move(RHS.ST); + TLI = std::move(RHS.TLI); + return *this; + } + + /// \name Scalar TTI Implementations + /// @{ + + unsigned getIntImmCost(const APInt &Imm, Type *Ty); + + unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty); + unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, + Type *Ty); + + TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); + + /// @} +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index 75100fb..db543f3 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -110,7 +110,7 @@ MCSymbol *TargetLoweringObjectFile::getSymbolWithGlobalValueBase( NameStr += DL->getPrivateGlobalPrefix(); TM.getNameWithPrefix(NameStr, GV, Mang); NameStr.append(Suffix.begin(), Suffix.end()); - return Ctx->GetOrCreateSymbol(NameStr.str()); + return Ctx->GetOrCreateSymbol(NameStr); } MCSymbol *TargetLoweringObjectFile::getCFIPersonalitySymbol( diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index dd07f81..5807cf7 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -187,5 +187,5 @@ MCSymbol *TargetMachine::getSymbol(const GlobalValue *GV, Mangler &Mang) const { SmallString<60> NameStr; getNameWithPrefix(NameStr, GV, Mang); const TargetLoweringObjectFile *TLOF = getObjFileLowering(); - return TLOF->getContext().GetOrCreateSymbol(NameStr.str()); + return TLOF->getContext().GetOrCreateSymbol(NameStr); } diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp index 236cb1b..1a5bf16 100644 --- a/lib/Target/TargetMachineC.cpp +++ b/lib/Target/TargetMachineC.cpp @@ -183,7 +183,9 @@ void LLVMSetTargetMachineAsmVerbosity(LLVMTargetMachineRef T, } static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M, - formatted_raw_ostream &OS, LLVMCodeGenFileType codegen, char **ErrorMessage) { + raw_pwrite_stream &OS, + LLVMCodeGenFileType codegen, + char **ErrorMessage) { TargetMachine* TM = unwrap(T); Module* Mod = unwrap(M); @@ -229,8 +231,7 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, *ErrorMessage = strdup(EC.message().c_str()); return true; } - formatted_raw_ostream destf(dest); - bool Result = LLVMTargetMachineEmit(T, M, destf, codegen, ErrorMessage); + bool Result = LLVMTargetMachineEmit(T, M, dest, codegen, ErrorMessage); dest.flush(); return Result; } @@ -238,15 +239,14 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, LLVMBool LLVMTargetMachineEmitToMemoryBuffer(LLVMTargetMachineRef T, LLVMModuleRef M, LLVMCodeGenFileType codegen, char** ErrorMessage, LLVMMemoryBufferRef *OutMemBuf) { - std::string CodeString; - raw_string_ostream OStream(CodeString); - formatted_raw_ostream Out(OStream); - bool Result = LLVMTargetMachineEmit(T, M, Out, codegen, ErrorMessage); + SmallString<0> CodeString; + raw_svector_ostream OStream(CodeString); + bool Result = LLVMTargetMachineEmit(T, M, OStream, codegen, ErrorMessage); OStream.flush(); - std::string &Data = OStream.str(); - *OutMemBuf = LLVMCreateMemoryBufferWithMemoryRangeCopy(Data.c_str(), - Data.length(), ""); + StringRef Data = OStream.str(); + *OutMemBuf = + LLVMCreateMemoryBufferWithMemoryRangeCopy(Data.data(), Data.size(), ""); return Result; } diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index c24805a..93c6ea0 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -2571,7 +2571,7 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, SmallString<16> Tmp; Tmp += Base; Tmp += ' '; - Op.setTokenValue(Tmp.str()); + Op.setTokenValue(Tmp); // If this instruction starts with an 'f', then it is a floating point stack // instruction. These come in up to three forms for 32-bit, 64-bit, and diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index 65461af..f265f1d 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -33,15 +33,12 @@ using namespace llvm; #define PRINT_ALIAS_INSTR #include "X86GenAsmWriter.inc" -void X86ATTInstPrinter::printRegName(raw_ostream &OS, - unsigned RegNo) const { - OS << markup("<reg:") - << '%' << getRegisterName(RegNo) - << markup(">"); +void X86ATTInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { + OS << markup("<reg:") << '%' << getRegisterName(RegNo) << markup(">"); } void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, - StringRef Annot) { + StringRef Annot, const MCSubtargetInfo &STI) { const MCInstrDesc &Desc = MII.get(MI->getOpcode()); uint64_t TSFlags = Desc.TSFlags; @@ -60,7 +57,7 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, // InstrInfo.td as soon as Requires clause is supported properly // for InstAlias. if (MI->getOpcode() == X86::CALLpcrel32 && - (getAvailableFeatures() & X86::Mode64Bit) != 0) { + (STI.getFeatureBits() & X86::Mode64Bit) != 0) { OS << "\tcallq\t"; printPCRelImm(MI, 0, OS); } @@ -169,8 +166,7 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, printRegName(O, Op.getReg()); } else if (Op.isImm()) { // Print X86 immediates as signed values. - O << markup("<imm:") - << '$' << formatImm((int64_t)Op.getImm()) + O << markup("<imm:") << '$' << formatImm((int64_t)Op.getImm()) << markup(">"); // If there are no instruction-specific comments, add a comment clarifying @@ -182,24 +178,22 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); - O << markup("<imm:") - << '$' << *Op.getExpr() - << markup(">"); + O << markup("<imm:") << '$' << *Op.getExpr() << markup(">"); } } void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O) { - const MCOperand &BaseReg = MI->getOperand(Op+X86::AddrBaseReg); - const MCOperand &IndexReg = MI->getOperand(Op+X86::AddrIndexReg); - const MCOperand &DispSpec = MI->getOperand(Op+X86::AddrDisp); - const MCOperand &SegReg = MI->getOperand(Op+X86::AddrSegmentReg); + const MCOperand &BaseReg = MI->getOperand(Op + X86::AddrBaseReg); + const MCOperand &IndexReg = MI->getOperand(Op + X86::AddrIndexReg); + const MCOperand &DispSpec = MI->getOperand(Op + X86::AddrDisp); + const MCOperand &SegReg = MI->getOperand(Op + X86::AddrSegmentReg); O << markup("<mem:"); // If this has a segment register, print it. if (SegReg.getReg()) { - printOperand(MI, Op+X86::AddrSegmentReg, O); + printOperand(MI, Op + X86::AddrSegmentReg, O); O << ':'; } @@ -215,16 +209,14 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op, if (IndexReg.getReg() || BaseReg.getReg()) { O << '('; if (BaseReg.getReg()) - printOperand(MI, Op+X86::AddrBaseReg, O); + printOperand(MI, Op + X86::AddrBaseReg, O); if (IndexReg.getReg()) { O << ','; - printOperand(MI, Op+X86::AddrIndexReg, O); - unsigned ScaleVal = MI->getOperand(Op+X86::AddrScaleAmt).getImm(); + printOperand(MI, Op + X86::AddrIndexReg, O); + unsigned ScaleVal = MI->getOperand(Op + X86::AddrScaleAmt).getImm(); if (ScaleVal != 1) { - O << ',' - << markup("<imm:") - << ScaleVal // never printed in hex. + O << ',' << markup("<imm:") << ScaleVal // never printed in hex. << markup(">"); } } @@ -236,13 +228,13 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op, void X86ATTInstPrinter::printSrcIdx(const MCInst *MI, unsigned Op, raw_ostream &O) { - const MCOperand &SegReg = MI->getOperand(Op+1); + const MCOperand &SegReg = MI->getOperand(Op + 1); O << markup("<mem:"); // If this has a segment register, print it. if (SegReg.getReg()) { - printOperand(MI, Op+1, O); + printOperand(MI, Op + 1, O); O << ':'; } @@ -267,13 +259,13 @@ void X86ATTInstPrinter::printDstIdx(const MCInst *MI, unsigned Op, void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op, raw_ostream &O) { const MCOperand &DispSpec = MI->getOperand(Op); - const MCOperand &SegReg = MI->getOperand(Op+1); + const MCOperand &SegReg = MI->getOperand(Op + 1); O << markup("<mem:"); // If this has a segment register, print it. if (SegReg.getReg()) { - printOperand(MI, Op+1, O); + printOperand(MI, Op + 1, O); O << ':'; } @@ -289,7 +281,6 @@ void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op, void X86ATTInstPrinter::printU8Imm(const MCInst *MI, unsigned Op, raw_ostream &O) { - O << markup("<imm:") - << '$' << formatImm(MI->getOperand(Op).getImm() & 0xff) + O << markup("<imm:") << '$' << formatImm(MI->getOperand(Op).getImm() & 0xff) << markup(">"); } diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h index f71cb81..62b6b73 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h @@ -24,14 +24,12 @@ class MCOperand; class X86ATTInstPrinter final : public MCInstPrinter { public: X86ATTInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) - : MCInstPrinter(MAI, MII, MRI) { - // Initialize the set of available features. - setAvailableFeatures(STI.getFeatureBits()); - } + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} void printRegName(raw_ostream &OS, unsigned RegNo) const override; - void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot, + const MCSubtargetInfo &STI) override; // Autogenerated by tblgen, returns true if we successfully printed an // alias. @@ -142,7 +140,6 @@ public: private: bool HasCustomInstComment; }; - } #endif diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 91d1828..4d92daf 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -33,7 +33,8 @@ void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { } void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, - StringRef Annot) { + StringRef Annot, + const MCSubtargetInfo &STI) { const MCInstrDesc &Desc = MII.get(MI->getOpcode()); uint64_t TSFlags = Desc.TSFlags; diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h index 2150144..6e371da 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h @@ -28,7 +28,8 @@ public: : MCInstPrinter(MAI, MII, MRI) {} void printRegName(raw_ostream &OS, unsigned RegNo) const override; - void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot, + const MCSubtargetInfo &STI) override; // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index a400d46..b84c983 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -360,7 +360,7 @@ public: ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) : ELFX86AsmBackend(T, OSABI, CPU) {} - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI, ELF::EM_386); } }; @@ -370,7 +370,7 @@ public: ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) : ELFX86AsmBackend(T, OSABI, CPU) {} - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI, ELF::EM_X86_64); } @@ -381,7 +381,7 @@ public: ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) : ELFX86AsmBackend(T, OSABI, CPU) {} - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createX86ELFObjectWriter(OS, /*IsELF64*/ true, OSABI, ELF::EM_X86_64); } }; @@ -395,7 +395,7 @@ public: , Is64Bit(is64Bit) { } - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createX86WinCOFFObjectWriter(OS, Is64Bit); } }; @@ -752,7 +752,7 @@ public: StringRef CPU) : DarwinX86AsmBackend(T, MRI, CPU, false) {} - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createX86MachObjectWriter(OS, /*Is64Bit=*/false, MachO::CPU_TYPE_I386, MachO::CPU_SUBTYPE_I386_ALL); @@ -772,7 +772,7 @@ public: StringRef CPU, MachO::CPUSubTypeX86 st) : DarwinX86AsmBackend(T, MRI, CPU, true), Subtype(st) {} - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createX86MachObjectWriter(OS, /*Is64Bit=*/true, MachO::CPU_TYPE_X86_64, Subtype); } diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp index 76a9d2b..4508883 100644 --- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp @@ -22,7 +22,8 @@ namespace { public: X86ELFObjectWriter(bool IsELF64, uint8_t OSABI, uint16_t EMachine); - virtual ~X86ELFObjectWriter(); + ~X86ELFObjectWriter() override; + protected: unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; @@ -248,9 +249,8 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target, return getRelocType32(Modifier, getType32(Type), IsPCRel); } -MCObjectWriter *llvm::createX86ELFObjectWriter(raw_ostream &OS, - bool IsELF64, - uint8_t OSABI, +MCObjectWriter *llvm::createX86ELFObjectWriter(raw_pwrite_stream &OS, + bool IsELF64, uint8_t OSABI, uint16_t EMachine) { MCELFObjectTargetWriter *MOTW = new X86ELFObjectWriter(IsELF64, OSABI, EMachine); diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 9b98a3e..e27b7cb 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -39,7 +39,7 @@ public: : MCII(mcii), Ctx(ctx) { } - ~X86MCCodeEmitter() {} + ~X86MCCodeEmitter() override {} bool is64BitMode(const MCSubtargetInfo &STI) const { return (STI.getFeatureBits() & X86::Mode64Bit) != 0; diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index 0946326..5bdd844 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -80,7 +80,7 @@ MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU, std::string ArchFS = X86_MC::ParseX86Triple(TT); if (!FS.empty()) { if (!ArchFS.empty()) - ArchFS = ArchFS + "," + FS.str(); + ArchFS = (Twine(ArchFS) + "," + FS).str(); else ArchFS = FS; } @@ -207,14 +207,13 @@ static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM, return X; } -static MCInstPrinter *createX86MCInstPrinter(const Target &T, +static MCInstPrinter *createX86MCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { + const MCRegisterInfo &MRI) { if (SyntaxVariant == 0) - return new X86ATTInstPrinter(MAI, MII, MRI, STI); + return new X86ATTInstPrinter(MAI, MII, MRI); if (SyntaxVariant == 1) return new X86IntelInstPrinter(MAI, MII, MRI); return nullptr; diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h index 6f50f11..dcdae1d 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -31,10 +31,11 @@ class Target; class Triple; class StringRef; class raw_ostream; +class raw_pwrite_stream; extern Target TheX86_32Target, TheX86_64Target; -/// DWARFFlavour - Flavour of dwarf regnumbers +/// Flavour of dwarf regnumbers /// namespace DWARFFlavour { enum { @@ -42,7 +43,7 @@ namespace DWARFFlavour { }; } -/// N86 namespace - Native X86 register numbers +/// Native X86 register numbers /// namespace N86 { enum { @@ -57,9 +58,8 @@ namespace X86_MC { void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI); - /// createX86MCSubtargetInfo - Create a X86 MCSubtargetInfo instance. - /// This is exposed so Asm parser, etc. do not need to go through - /// TargetRegistry. + /// Create a X86 MCSubtargetInfo instance. This is exposed so Asm parser, etc. + /// do not need to go through TargetRegistry. MCSubtargetInfo *createX86MCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS); } @@ -78,27 +78,25 @@ MCAsmBackend *createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI, /// /// Takes ownership of \p AB and \p CE. MCStreamer *createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, - raw_ostream &OS, MCCodeEmitter *CE, + raw_pwrite_stream &OS, MCCodeEmitter *CE, bool RelaxAll); -/// createX86MachObjectWriter - Construct an X86 Mach-O object writer. -MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS, - bool Is64Bit, +/// Construct an X86 Mach-O object writer. +MCObjectWriter *createX86MachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype); -/// createX86ELFObjectWriter - Construct an X86 ELF object writer. -MCObjectWriter *createX86ELFObjectWriter(raw_ostream &OS, - bool IsELF64, - uint8_t OSABI, - uint16_t EMachine); -/// createX86WinCOFFObjectWriter - Construct an X86 Win COFF object writer. -MCObjectWriter *createX86WinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit); +/// Construct an X86 ELF object writer. +MCObjectWriter *createX86ELFObjectWriter(raw_pwrite_stream &OS, bool IsELF64, + uint8_t OSABI, uint16_t EMachine); +/// Construct an X86 Win COFF object writer. +MCObjectWriter *createX86WinCOFFObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit); -/// createX86_64MachORelocationInfo - Construct X86-64 Mach-O relocation info. +/// Construct X86-64 Mach-O relocation info. MCRelocationInfo *createX86_64MachORelocationInfo(MCContext &Ctx); -/// createX86_64ELFORelocationInfo - Construct X86-64 ELF relocation info. +/// Construct X86-64 ELF relocation info. MCRelocationInfo *createX86_64ELFRelocationInfo(MCContext &Ctx); } // End llvm namespace diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index 7a83f4c..38539cd 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -575,9 +575,8 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); } -MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS, - bool Is64Bit, - uint32_t CPUType, +MCObjectWriter *llvm::createX86MachObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) { return createMachObjectWriter(new X86MachObjectWriter(Is64Bit, CPUType, diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp index e1df5c2..bd1bc99 100644 --- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp @@ -25,7 +25,7 @@ namespace { class X86WinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter { public: X86WinCOFFObjectWriter(bool Is64Bit); - virtual ~X86WinCOFFObjectWriter(); + ~X86WinCOFFObjectWriter() override; unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsCrossSection, @@ -90,7 +90,7 @@ unsigned X86WinCOFFObjectWriter::getRelocType(const MCValue &Target, llvm_unreachable("Unsupported COFF machine type."); } -MCObjectWriter *llvm::createX86WinCOFFObjectWriter(raw_ostream &OS, +MCObjectWriter *llvm::createX86WinCOFFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit) { MCWinCOFFObjectTargetWriter *MOTW = new X86WinCOFFObjectWriter(Is64Bit); return createWinCOFFObjectWriter(MOTW, OS); diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp index 5690efe..92f42b6 100644 --- a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp @@ -18,8 +18,8 @@ class X86WinCOFFStreamer : public MCWinCOFFStreamer { Win64EH::UnwindEmitter EHStreamer; public: X86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter *CE, - raw_ostream &OS) - : MCWinCOFFStreamer(C, AB, *CE, OS) { } + raw_pwrite_stream &OS) + : MCWinCOFFStreamer(C, AB, *CE, OS) {} void EmitWinEHHandlerData() override; void EmitWindowsUnwindTables() override; @@ -49,8 +49,8 @@ void X86WinCOFFStreamer::FinishImpl() { } MCStreamer *llvm::createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, - raw_ostream &OS, MCCodeEmitter *CE, - bool RelaxAll) { + raw_pwrite_stream &OS, + MCCodeEmitter *CE, bool RelaxAll) { X86WinCOFFStreamer *S = new X86WinCOFFStreamer(C, AB, CE, OS); S->getAssembler().setRelaxAll(RelaxAll); return S; diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 4f9836d..d13f155 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -273,17 +273,15 @@ def : SilvermontProc<"silvermont">; def : SilvermontProc<"slm">; // Legacy alias. // "Arrandale" along with corei3 and corei5 -class NehalemProc<string Name, list<SubtargetFeature> AdditionalFeatures> - : ProcessorModel<Name, SandyBridgeModel, !listconcat([ - FeatureSSE42, - FeatureCMPXCHG16B, - FeatureSlowBTMem, - FeatureFastUAMem, - FeaturePOPCNT - ], - AdditionalFeatures)>; -def : NehalemProc<"nehalem", []>; -def : NehalemProc<"corei7", [FeatureAES]>; +class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [ + FeatureSSE42, + FeatureCMPXCHG16B, + FeatureSlowBTMem, + FeatureFastUAMem, + FeaturePOPCNT + ]>; +def : NehalemProc<"nehalem">; +def : NehalemProc<"corei7">; // Westmere is a similar machine to nehalem with some additional features. // Westmere is the corei3/i5/i7 path from nehalem to sandybridge diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index f6033a7..2ed4975 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -523,7 +523,6 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) { // must be registered in .sxdata. Use of any unregistered handlers will // cause the process to terminate immediately. LLVM does not know how to // register any SEH handlers, so its object files should be safe. - S->setAbsolute(); OutStreamer.EmitSymbolAttribute(S, MCSA_Global); OutStreamer.EmitAssignment( S, MCConstantExpr::Create(int64_t(1), MMI->getContext())); @@ -723,28 +722,8 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { } } - if (TT.isOSBinFormatELF()) { - const TargetLoweringObjectFileELF &TLOFELF = - static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering()); - - MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); - - // Output stubs for external and common global variables. - MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); - if (!Stubs.empty()) { - OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); - const DataLayout *TD = TM.getDataLayout(); - - for (const auto &Stub : Stubs) { - OutStreamer.EmitLabel(Stub.first); - OutStreamer.EmitSymbolValue(Stub.second.getPointer(), - TD->getPointerSize()); - } - Stubs.clear(); - } - + if (TT.isOSBinFormatELF()) SM.serializeToStackMapSection(); - } } //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index cba140f..cdf10a7 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -2417,6 +2417,8 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); // FIXME may need to add RegState::Debug to any registers produced, // although ESP/EBP should be the only ones at the moment. + assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) && + "Expected inlined-at fields to agree"); addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM) .addImm(0) .addMetadata(DI->getVariable()) diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index fb12ce5..5da7acf 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2187,7 +2187,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0) break; - unsigned ShlOp, Op; + unsigned ShlOp, AddOp, Op; MVT CstVT = NVT; // Check the minimum bitwidth for the new constant. @@ -2208,6 +2208,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case MVT::i32: assert(CstVT == MVT::i8); ShlOp = X86::SHL32ri; + AddOp = X86::ADD32rr; switch (Opcode) { default: llvm_unreachable("Impossible opcode"); @@ -2219,6 +2220,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case MVT::i64: assert(CstVT == MVT::i8 || CstVT == MVT::i32); ShlOp = X86::SHL64ri; + AddOp = X86::ADD64rr; switch (Opcode) { default: llvm_unreachable("Impossible opcode"); @@ -2232,6 +2234,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // Emit the smaller op and the shift. SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, CstVT); SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst); + if (ShlVal == 1) + return CurDAG->SelectNodeTo(Node, AddOp, NVT, SDValue(New, 0), + SDValue(New, 0)); return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0), getI8Imm(ShlVal)); } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 8b92e70..c32412a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -32,6 +32,7 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" @@ -2142,6 +2143,7 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), /*isVolatile*/false, /*AlwaysInline=*/true, + /*isTailCall*/false, MachinePointerInfo(), MachinePointerInfo()); } @@ -2277,6 +2279,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, const { MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); + const TargetFrameLowering &TFI = *Subtarget->getFrameLowering(); const Function* Fn = MF.getFunction(); if (Fn->hasExternalLinkage() && @@ -2416,6 +2419,13 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, MFI->CreateFixedObject(1, StackSize, true)); } + MachineModuleInfo &MMI = MF.getMMI(); + const Function *WinEHParent = nullptr; + if (IsWin64 && MMI.hasWinEHFuncInfo(Fn)) + WinEHParent = MMI.getWinEHParent(Fn); + bool IsWinEHOutlined = WinEHParent && WinEHParent != Fn; + bool IsWinEHParent = WinEHParent && WinEHParent == Fn; + // Figure out if XMM registers are in use. assert(!(MF.getTarget().Options.UseSoftFloat && Fn->hasFnAttribute(Attribute::NoImplicitFloat)) && @@ -2452,7 +2462,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, } if (IsWin64) { - const TargetFrameLowering &TFI = *Subtarget->getFrameLowering(); // Get to the caller-allocated home save location. Add 8 to account // for the return address. int HomeOffset = TFI.getOffsetOfLocalArea() + 8; @@ -2505,6 +2514,27 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, if (!MemOps.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); + } else if (IsWinEHOutlined) { + // Get to the caller-allocated home save location. Add 8 to account + // for the return address. + int HomeOffset = TFI.getOffsetOfLocalArea() + 8; + FuncInfo->setRegSaveFrameIndex(MFI->CreateFixedObject( + /*Size=*/1, /*SPOffset=*/HomeOffset + 8, /*Immutable=*/false)); + + MMI.getWinEHFuncInfo(Fn) + .CatchHandlerParentFrameObjIdx[const_cast<Function *>(Fn)] = + FuncInfo->getRegSaveFrameIndex(); + + // Store the second integer parameter (rdx) into rsp+16 relative to the + // stack pointer at the entry of the function. + SDValue RSFIN = + DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), getPointerTy()); + unsigned GPR = MF.addLiveIn(X86::RDX, &X86::GR64RegClass); + SDValue Val = DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64); + Chain = DAG.getStore( + Val.getValue(1), dl, Val, RSFIN, + MachinePointerInfo::getFixedStack(FuncInfo->getRegSaveFrameIndex()), + /*isVolatile=*/true, /*isNonTemporal=*/false, /*Alignment=*/0); } if (isVarArg && MFI->hasMustTailInVarArgFunc()) { @@ -2571,6 +2601,17 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, FuncInfo->setArgumentStackSize(StackSize); + if (IsWinEHParent) { + int UnwindHelpFI = MFI->CreateStackObject(8, 8, /*isSS=*/false); + SDValue StackSlot = DAG.getFrameIndex(UnwindHelpFI, MVT::i64); + MMI.getWinEHFuncInfo(MF.getFunction()).UnwindHelpFrameIdx = UnwindHelpFI; + SDValue Neg2 = DAG.getConstant(-2, MVT::i64); + Chain = DAG.getStore(Chain, dl, Neg2, StackSlot, + MachinePointerInfo::getFixedStack(UnwindHelpFI), + /*isVolatile=*/true, + /*isNonTemporal=*/false, /*Alignment=*/0); + } + return Chain; } @@ -4420,6 +4461,29 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, SDLoc dl(Op); SDValue V; bool First = true; + + // SSE4.1 - use PINSRB to insert each byte directly. + if (Subtarget->hasSSE41()) { + for (unsigned i = 0; i < 16; ++i) { + bool isNonZero = (NonZeros & (1 << i)) != 0; + if (isNonZero) { + if (First) { + if (NumZero) + V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl); + else + V = DAG.getUNDEF(MVT::v16i8); + First = false; + } + V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, + MVT::v16i8, V, Op.getOperand(i), + DAG.getIntPtrConstant(i)); + } + } + + return V; + } + + // Pre-SSE4.1 - merge byte pairs and insert with PINSRW. for (unsigned i = 0; i < 16; ++i) { bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; if (ThisIsNonZero && First) { @@ -5650,14 +5714,24 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); } + // We can't directly insert an i8 or i16 into a vector, so zero extend + // it to i32 first. if (ExtVT == MVT::i16 || ExtVT == MVT::i8) { Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); - Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); if (VT.is256BitVector()) { - SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl); - Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl); + if (Subtarget->hasAVX()) { + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v8i32, Item); + Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); + } else { + // Without AVX, we need to extend to a 128-bit vector and then + // insert into the 256-bit vector. + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); + SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl); + Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl); + } } else { assert(VT.is128BitVector() && "Expected an SSE value type!"); + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); } return DAG.getNode(ISD::BITCAST, dl, VT, Item); @@ -5877,7 +5951,7 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); unsigned NumElems = ResVT.getVectorNumElements(); - if(ResVT.is256BitVector()) + if (ResVT.is256BitVector()) return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl); if (Op.getNumOperands() == 4) { @@ -9281,15 +9355,6 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, if (isShuffleEquivalent(V1, V2, Mask, {5, 1, 7, 3})) return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V2, V1); - // If we have a single input to the zero element, insert that into V1 if we - // can do so cheaply. - int NumV2Elements = - std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; }); - if (NumV2Elements == 1 && Mask[0] >= 4) - if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( - DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) - return Insertion; - if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) return Blend; @@ -9432,15 +9497,6 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ArrayRef<int> Mask = SVOp->getMask(); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); - // If we have a single input to the zero element, insert that into V1 if we - // can do so cheaply. - int NumV2Elements = - std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 8; }); - if (NumV2Elements == 1 && Mask[0] >= 8) - if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( - DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG)) - return Insertion; - if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG)) return Blend; @@ -9811,6 +9867,18 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); ArrayRef<int> Mask = SVOp->getMask(); + // If we have a single input to the zero element, insert that into V1 if we + // can do so cheaply. + int NumElts = VT.getVectorNumElements(); + int NumV2Elements = std::count_if(Mask.begin(), Mask.end(), [NumElts](int M) { + return M >= NumElts; + }); + + if (NumV2Elements == 1 && Mask[0] >= NumElts) + if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( + DL, VT, V1, V2, Mask, Subtarget, DAG)) + return Insertion; + // There is a really nice hard cut-over between AVX1 and AVX2 that means we can // check for those subtargets here and avoid much of the subtarget querying in // the per-vector-type lowering routines. With AVX1 we have essentially *zero* @@ -11903,7 +11971,7 @@ static SDValue LowerZERO_EXTEND_AVX512(SDValue Op, // Now we have only mask extension assert(InVT.getVectorElementType() == MVT::i1); SDValue Cst = DAG.getTargetConstant(1, ExtVT.getScalarType()); - const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue(); + const Constant *C = cast<ConstantSDNode>(Cst)->getConstantIntValue(); SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy()); unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment(); SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP, @@ -11979,7 +12047,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { } SDValue Cst = DAG.getTargetConstant(1, InVT.getVectorElementType()); - const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue(); + const Constant *C = cast<ConstantSDNode>(Cst)->getConstantIntValue(); SDValue CP = DAG.getConstantPool(C, getPointerTy()); unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment(); SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP, @@ -12750,6 +12818,16 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op, return SDValue(); } +/// If we have at least two divisions that use the same divisor, convert to +/// multplication by a reciprocal. This may need to be adjusted for a given +/// CPU if a division's cost is not at least twice the cost of a multiplication. +/// This is because we still need one division to calculate the reciprocal and +/// then we need two multiplies by that reciprocal as replacements for the +/// original divisions. +bool X86TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const { + return NumUsers > 1; +} + static bool isAllOnes(SDValue V) { ConstantSDNode *C = dyn_cast<ConstantSDNode>(V); return C && C->isAllOnesValue(); @@ -14427,7 +14505,7 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget, return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(24), 8, /*isVolatile*/false, - false, + false, false, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } @@ -15220,10 +15298,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, } case PREFETCH: { SDValue Hint = Op.getOperand(6); - unsigned HintVal; - if (dyn_cast<ConstantSDNode> (Hint) == nullptr || - (HintVal = dyn_cast<ConstantSDNode> (Hint)->getZExtValue()) > 1) - llvm_unreachable("Wrong prefetch hint in intrinsic: should be 0 or 1"); + unsigned HintVal = cast<ConstantSDNode>(Hint)->getZExtValue(); + assert(HintVal < 2 && "Wrong prefetch hint in intrinsic: should be 0 or 1"); unsigned Opcode = (HintVal ? IntrData->Opc1 : IntrData->Opc0); SDValue Chain = Op.getOperand(0); SDValue Mask = Op.getOperand(2); @@ -24175,7 +24251,7 @@ TargetLowering::ConstraintWeight break; case 'G': case 'C': - if (dyn_cast<ConstantFP>(CallOperandVal)) { + if (isa<ConstantFP>(CallOperandVal)) { weight = CW_Constant; } break; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index dd20ec2..5130c37 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -1072,6 +1072,9 @@ namespace llvm { /// Use rcp* to speed up fdiv calculations. SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, unsigned &RefinementSteps) const override; + + /// Reassociate floating point divisions into multiply by reciprocal. + bool combineRepeatedFPDivisors(unsigned NumUsers) const override; }; namespace X86 { diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 509602f..0959162 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2971,60 +2971,36 @@ multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w, itins, HasBWI, IsCommutable>; } -multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, ValueType DstVT, - ValueType SrcVT, RegisterClass KRC, RegisterClass RC, - PatFrag memop_frag, X86MemOperand x86memop, - PatFrag scalar_mfrag, X86MemOperand x86scalar_mop, - string BrdcstStr, OpndItins itins, bit IsCommutable = 0> { - let isCommutable = IsCommutable in - { - def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst), - (ins RC:$src1, RC:$src2), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, EVEX_4V; - def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst), - (ins KRC:$mask, RC:$src1, RC:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"), - [], itins.rr>, EVEX_4V, EVEX_K; - def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst), - (ins KRC:$mask, RC:$src1, RC:$src2), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}} {z}" , - "|$dst {${mask}} {z}, $src1, $src2}"), - [], itins.rr>, EVEX_4V, EVEX_KZ; - } +multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins, + SDNode OpNode,X86VectorVTInfo _Src, + X86VectorVTInfo _Dst, bit IsCommutable = 0> { + defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), + (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, + "$src2, $src1","$src1, $src2", + (_Dst.VT (OpNode + (_Src.VT _Src.RC:$src1), + (_Src.VT _Src.RC:$src2))), + "",itins.rr, IsCommutable>, + AVX512BIBase, EVEX_4V; let mayLoad = 1 in { - def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst), - (ins RC:$src1, x86memop:$src2), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, EVEX_4V; - def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst), - (ins KRC:$mask, RC:$src1, x86memop:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"), - [], itins.rm>, EVEX_4V, EVEX_K; - def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst), - (ins KRC:$mask, RC:$src1, x86memop:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"), - [], itins.rm>, EVEX_4V, EVEX_KZ; - def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst), - (ins RC:$src1, x86scalar_mop:$src2), - !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr, - ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"), - [], itins.rm>, EVEX_4V, EVEX_B; - def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst), - (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), - !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr, - ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}", - BrdcstStr, "}"), - [], itins.rm>, EVEX_4V, EVEX_B, EVEX_K; - def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst), - (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), - !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr, - ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}", - BrdcstStr, "}"), - [], itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ; + defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), + (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, + "$src2, $src1", "$src1, $src2", + (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), + (bitconvert (_Src.LdFrag addr:$src2)))), + "", itins.rm>, + AVX512BIBase, EVEX_4V; + + defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), + (ins _Src.RC:$src1, _Dst.ScalarMemOp:$src2), + OpcodeStr, + "${src2}"##_Dst.BroadcastStr##", $src1", + "$src1, ${src2}"##_Dst.BroadcastStr, + (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bc_v16i32 + (_Dst.VT (X86VBroadcast + (_Dst.ScalarLdFrag addr:$src2)))))), + "", itins.rm>, + AVX512BIBase, EVEX_4V, EVEX_B; } } @@ -3039,24 +3015,13 @@ defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul, defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul, SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD; -defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512, - loadv8i64, i512mem, loadi64, i64mem, "{1to8}", - SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, - EVEX_CD8<64, CD8VF>, VEX_W; - -defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, VK8WM, VR512, - loadv8i64, i512mem, loadi64, i64mem, "{1to8}", - SSE_INTMUL_ITINS_P, 1>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; +defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", SSE_INTALU_ITINS_P, + X86pmuldq, v16i32_info, v8i64_info, 1>, + T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; -def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))), - (VPMULUDQZrr VR512:$src1, VR512:$src2)>; - -def : Pat<(v8i64 (int_x86_avx512_mask_pmulu_dq_512 (v16i32 VR512:$src1), - (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), - (VPMULUDQZrr VR512:$src1, VR512:$src2)>; -def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1), - (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), - (VPMULDQZrr VR512:$src1, VR512:$src2)>; +defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P, + X86pmuludq, v16i32_info, v8i64_info, 1>, + EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", X86smax, SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; @@ -3208,7 +3173,7 @@ defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or, defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, SSE_INTALU_ITINS_P, HasAVX512, 1>; defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, - SSE_INTALU_ITINS_P, HasAVX512, 1>; + SSE_INTALU_ITINS_P, HasAVX512, 0>; //===----------------------------------------------------------------------===// // AVX-512 FP arithmetic @@ -3743,16 +3708,19 @@ multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.ScalarMemOp:$src3), - OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr ), - (OpNode _.RC:$src1, _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>, + OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), + !strconcat("$src2, ${src3}", _.BroadcastStr ), + (OpNode _.RC:$src1, + _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>, AVX512FMA3Base, EVEX_B; } } // Constraints = "$src1 = $dst" let Constraints = "$src1 = $dst" in { // Omitting the parameter OpNode (= null_frag) disables ISel pattern matching. -multiclass avx512_fma3_round_rrb<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, - SDPatternOperator OpNode> { +multiclass avx512_fma3_round_rrb<bits<8> opc, string OpcodeStr, + X86VectorVTInfo _, + SDPatternOperator OpNode> { defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", @@ -3772,7 +3740,6 @@ multiclass avx512_fma3p_forms<bits<8> opc213, bits<8> opc231, SDPatternOperator OpNode> { defm v213r : avx512_fma3p_rm<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix), VTI, OpNode>, EVEX_CD8<VTI.EltSize, CD8VF>; - defm v231r : avx512_fma3p_rm<opc231, !strconcat(OpcodeStr, "231", VTI.Suffix), VTI>, EVEX_CD8<VTI.EltSize, CD8VF>; } @@ -3794,12 +3761,14 @@ let ExeDomain = SSEPackedSingle in { let ExeDomain = SSEPackedDouble in { defm NAME##PDZ : avx512_fma3p_forms<opc213, opc231, OpcodeStr, v8f64_info, OpNode>, - avx512_fma3_round_forms<opc213, OpcodeStr, - v8f64_info, OpNodeRnd>, EVEX_V512, VEX_W; + avx512_fma3_round_forms<opc213, OpcodeStr, v8f64_info, + OpNodeRnd>, EVEX_V512, VEX_W; defm NAME##PDZ256 : avx512_fma3p_forms<opc213, opc231, OpcodeStr, - v4f64x_info, OpNode>, EVEX_V256, VEX_W; + v4f64x_info, OpNode>, + EVEX_V256, VEX_W; defm NAME##PDZ128 : avx512_fma3p_forms<opc213, opc231, OpcodeStr, - v2f64x_info, OpNode>, EVEX_V128, VEX_W; + v2f64x_info, OpNode>, + EVEX_V128, VEX_W; } } @@ -3830,26 +3799,29 @@ multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr, SDNode OpNode, } } // Constraints = "$src1 = $dst" - -multiclass avx512_fma3p_m132_f<bits<8> opc, - string OpcodeStr, - SDNode OpNode> { +multiclass avx512_fma3p_m132_f<bits<8> opc, string OpcodeStr, SDNode OpNode> { let ExeDomain = SSEPackedSingle in { defm NAME##PSZ : avx512_fma3p_m132<opc, OpcodeStr##ps, - OpNode,v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; + OpNode,v16f32_info>, EVEX_V512, + EVEX_CD8<32, CD8VF>; defm NAME##PSZ256 : avx512_fma3p_m132<opc, OpcodeStr##ps, - OpNode, v8f32x_info>, EVEX_V256, EVEX_CD8<32, CD8VF>; + OpNode, v8f32x_info>, EVEX_V256, + EVEX_CD8<32, CD8VF>; defm NAME##PSZ128 : avx512_fma3p_m132<opc, OpcodeStr##ps, - OpNode, v4f32x_info>, EVEX_V128, EVEX_CD8<32, CD8VF>; + OpNode, v4f32x_info>, EVEX_V128, + EVEX_CD8<32, CD8VF>; } let ExeDomain = SSEPackedDouble in { defm NAME##PDZ : avx512_fma3p_m132<opc, OpcodeStr##pd, - OpNode, v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VF>; + OpNode, v8f64_info>, EVEX_V512, + VEX_W, EVEX_CD8<32, CD8VF>; defm NAME##PDZ256 : avx512_fma3p_m132<opc, OpcodeStr##pd, - OpNode, v4f64x_info>, EVEX_V256, VEX_W, EVEX_CD8<32, CD8VF>; + OpNode, v4f64x_info>, EVEX_V256, + VEX_W, EVEX_CD8<32, CD8VF>; defm NAME##PDZ128 : avx512_fma3p_m132<opc, OpcodeStr##pd, - OpNode, v2f64x_info>, EVEX_V128, VEX_W, EVEX_CD8<32, CD8VF>; + OpNode, v2f64x_info>, EVEX_V128, + VEX_W, EVEX_CD8<32, CD8VF>; } } @@ -3860,7 +3832,6 @@ defm VFMSUBADD132 : avx512_fma3p_m132_f<0x97, "vfmsubadd132", X86Fmsubadd>; defm VFNMADD132 : avx512_fma3p_m132_f<0x9C, "vfnmadd132", X86Fnmadd>; defm VFNMSUB132 : avx512_fma3p_m132_f<0x9E, "vfnmsub132", X86Fnmsub>; - // Scalar FMA let Constraints = "$src1 = $dst" in { multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, @@ -3883,7 +3854,6 @@ multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, (OpVT (OpNode RC:$src2, RC:$src1, (mem_frag addr:$src3))))]>; } - } // Constraints = "$src1 = $dst" defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss", X86Fmadd, FR32X, @@ -3920,6 +3890,7 @@ let hasSideEffects = 0 in { EVEX_4V; } // hasSideEffects = 0 } + let Predicates = [HasAVX512] in { defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}">, XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 78efc4d..5e19ad4 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -1216,10 +1216,10 @@ def X86testpat : PatFrag<(ops node:$lhs, node:$rhs), let isCompare = 1 in { let Defs = [EFLAGS] in { let isCommutable = 1 in { - def TEST8rr : BinOpRR_F<0x84, "test", Xi8 , X86testpat, MRMSrcReg>; - def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat, MRMSrcReg>; - def TEST32rr : BinOpRR_F<0x84, "test", Xi32, X86testpat, MRMSrcReg>; - def TEST64rr : BinOpRR_F<0x84, "test", Xi64, X86testpat, MRMSrcReg>; + def TEST8rr : BinOpRR_F<0x84, "test", Xi8 , X86testpat>; + def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat>; + def TEST32rr : BinOpRR_F<0x84, "test", Xi32, X86testpat>; + def TEST64rr : BinOpRR_F<0x84, "test", Xi64, X86testpat>; } // isCommutable def TEST8rm : BinOpRM_F<0x84, "test", Xi8 , X86testpat>; diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 18bbe5d..45e6d0a 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -1232,7 +1232,11 @@ def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst), // with implicit zero-extension instead of a 64-bit and if the immediate has at // least 32 bits of leading zeros. If in addition the last 32 bits can be // represented with a sign extension of a 8 bit constant, use that. +// This can also reduce instruction size by eliminating the need for the REX +// prefix. +// AddedComplexity is needed to give priority over i64immSExt8 and i64immSExt32. +let AddedComplexity = 1 in { def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm), (SUBREG_TO_REG (i64 0), @@ -1248,8 +1252,13 @@ def : Pat<(and GR64:$src, i64immZExt32:$imm), (EXTRACT_SUBREG GR64:$src, sub_32bit), (i32 (GetLo32XForm imm:$imm))), sub_32bit)>; +} // AddedComplexity = 1 +// AddedComplexity is needed due to the increased complexity on the +// i64immZExt32SExt8 and i64immZExt32 patterns above. Applying this to all +// the MOVZX patterns keeps thems together in DAGIsel tables. +let AddedComplexity = 1 in { // r & (2^16-1) ==> movz def : Pat<(and GR32:$src1, 0xffff), (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>; @@ -1272,6 +1281,7 @@ def : Pat<(and GR64:$src, 0x00000000FFFFFFFF), (MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)), sub_32bit)>; // r & (2^16-1) ==> movz +let AddedComplexity = 1 in // Give priority over i64immZExt32. def : Pat<(and GR64:$src, 0xffff), (SUBREG_TO_REG (i64 0), (MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))), @@ -1290,6 +1300,7 @@ def : Pat<(and GR16:$src1, 0xff), (EXTRACT_SUBREG (MOVZX32rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit))), sub_16bit)>, Requires<[In64BitMode]>; +} // AddedComplexity = 1 // sext_inreg patterns diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 0bdabdf..b75a9f4 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -631,53 +631,53 @@ def vinsert256_insert : PatFrag<(ops node:$bigvec, node:$smallvec, def masked_load_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (masked_load node:$src1, node:$src2, node:$src3), [{ - if (dyn_cast<MaskedLoadSDNode>(N)) - return cast<MaskedLoadSDNode>(N)->getAlignment() >= 16; + if (auto *Load = dyn_cast<MaskedLoadSDNode>(N)) + return Load->getAlignment() >= 16; return false; }]>; def masked_load_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (masked_load node:$src1, node:$src2, node:$src3), [{ - if (dyn_cast<MaskedLoadSDNode>(N)) - return cast<MaskedLoadSDNode>(N)->getAlignment() >= 32; + if (auto *Load = dyn_cast<MaskedLoadSDNode>(N)) + return Load->getAlignment() >= 32; return false; }]>; def masked_load_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (masked_load node:$src1, node:$src2, node:$src3), [{ - if (dyn_cast<MaskedLoadSDNode>(N)) - return cast<MaskedLoadSDNode>(N)->getAlignment() >= 64; + if (auto *Load = dyn_cast<MaskedLoadSDNode>(N)) + return Load->getAlignment() >= 64; return false; }]>; def masked_load_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3), (masked_load node:$src1, node:$src2, node:$src3), [{ - return (dyn_cast<MaskedLoadSDNode>(N) != 0); + return isa<MaskedLoadSDNode>(N); }]>; def masked_store_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (masked_store node:$src1, node:$src2, node:$src3), [{ - if (dyn_cast<MaskedStoreSDNode>(N)) - return cast<MaskedStoreSDNode>(N)->getAlignment() >= 16; + if (auto *Store = dyn_cast<MaskedStoreSDNode>(N)) + return Store->getAlignment() >= 16; return false; }]>; def masked_store_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (masked_store node:$src1, node:$src2, node:$src3), [{ - if (dyn_cast<MaskedStoreSDNode>(N)) - return cast<MaskedStoreSDNode>(N)->getAlignment() >= 32; + if (auto *Store = dyn_cast<MaskedStoreSDNode>(N)) + return Store->getAlignment() >= 32; return false; }]>; def masked_store_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (masked_store node:$src1, node:$src2, node:$src3), [{ - if (dyn_cast<MaskedStoreSDNode>(N)) - return cast<MaskedStoreSDNode>(N)->getAlignment() >= 64; + if (auto *Store = dyn_cast<MaskedStoreSDNode>(N)) + return Store->getAlignment() >= 64; return false; }]>; def masked_store_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3), (masked_store node:$src1, node:$src2, node:$src3), [{ - return (dyn_cast<MaskedStoreSDNode>(N) != 0); + return isa<MaskedStoreSDNode>(N); }]>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 538ec1c..fbfd868 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -559,6 +559,15 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::MMX_PABSWrr64, X86::MMX_PABSWrm64, 0 }, { X86::MMX_PSHUFWri, X86::MMX_PSHUFWmi, 0 }, + // 3DNow! version of foldable instructions + { X86::PF2IDrr, X86::PF2IDrm, 0 }, + { X86::PF2IWrr, X86::PF2IWrm, 0 }, + { X86::PFRCPrr, X86::PFRCPrm, 0 }, + { X86::PFRSQRTrr, X86::PFRSQRTrm, 0 }, + { X86::PI2FDrr, X86::PI2FDrm, 0 }, + { X86::PI2FWrr, X86::PI2FWrm, 0 }, + { X86::PSWAPDrr, X86::PSWAPDrm, 0 }, + // AVX 128-bit versions of foldable instructions { X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, 0 }, { X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, 0 }, @@ -943,6 +952,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::CMPPSrri, X86::CMPPSrmi, TB_ALIGN_16 }, { X86::CMPSDrr, X86::CMPSDrm, 0 }, { X86::CMPSSrr, X86::CMPSSrm, 0 }, + { X86::CRC32r32r32, X86::CRC32r32m32, 0 }, + { X86::CRC32r64r64, X86::CRC32r64m64, 0 }, { X86::DIVPDrr, X86::DIVPDrm, TB_ALIGN_16 }, { X86::DIVPSrr, X86::DIVPSrm, TB_ALIGN_16 }, { X86::DIVSDrr, X86::DIVSDrm, 0 }, @@ -1201,6 +1212,25 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::MMX_PUNPCKLWDirr, X86::MMX_PUNPCKLWDirm, 0 }, { X86::MMX_PXORirr, X86::MMX_PXORirm, 0 }, + // 3DNow! version of foldable instructions + { X86::PAVGUSBrr, X86::PAVGUSBrm, 0 }, + { X86::PFACCrr, X86::PFACCrm, 0 }, + { X86::PFADDrr, X86::PFADDrm, 0 }, + { X86::PFCMPEQrr, X86::PFCMPEQrm, 0 }, + { X86::PFCMPGErr, X86::PFCMPGErm, 0 }, + { X86::PFCMPGTrr, X86::PFCMPGTrm, 0 }, + { X86::PFMAXrr, X86::PFMAXrm, 0 }, + { X86::PFMINrr, X86::PFMINrm, 0 }, + { X86::PFMULrr, X86::PFMULrm, 0 }, + { X86::PFNACCrr, X86::PFNACCrm, 0 }, + { X86::PFPNACCrr, X86::PFPNACCrm, 0 }, + { X86::PFRCPIT1rr, X86::PFRCPIT1rm, 0 }, + { X86::PFRCPIT2rr, X86::PFRCPIT2rm, 0 }, + { X86::PFRSQIT1rr, X86::PFRSQIT1rm, 0 }, + { X86::PFSUBrr, X86::PFSUBrm, 0 }, + { X86::PFSUBRrr, X86::PFSUBRrm, 0 }, + { X86::PMULHRWrr, X86::PMULHRWrm, 0 }, + // AVX 128-bit versions of foldable instructions { X86::VCVTSD2SSrr, X86::VCVTSD2SSrm, 0 }, { X86::Int_VCVTSD2SSrr, X86::Int_VCVTSD2SSrm, 0 }, @@ -5969,6 +5999,7 @@ static const uint16_t ReplaceableInstrs[][3] = { { X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr }, { X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr }, { X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm }, + { X86::MOVLPSmr, X86::MOVLPDmr, X86::MOVPQI2QImr }, { X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr }, { X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm }, { X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr }, @@ -5984,6 +6015,7 @@ static const uint16_t ReplaceableInstrs[][3] = { { X86::VMOVAPSrr, X86::VMOVAPDrr, X86::VMOVDQArr }, { X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr }, { X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm }, + // TODO: Add the AVX versions of MOVLPSmr { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr }, { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm }, { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr }, diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index ccdbf0e..65b155c 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -643,9 +643,6 @@ let Predicates = [UseAVX] in { // Represent the same patterns above but in the form they appear for // 256-bit types - def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, - (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))), - (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; @@ -653,9 +650,6 @@ let Predicates = [UseAVX] in { (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>; } - def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, - (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))), - (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_xmm)>; // Extract and store. def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), @@ -793,7 +787,7 @@ let Predicates = [UseSSE2] in { (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem - // is during lowering, where it's not possible to recognize the fold cause + // is during lowering, where it's not possible to recognize the fold because // it has two uses through a bitcast. One use disappears at isel time and the // fold opportunity reappears. def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), @@ -3678,13 +3672,30 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), PS, Requires<[HasSSE2]>; } // SchedRW = [WriteStore] +let Predicates = [HasAVX2, NoVLX] in { + def : Pat<(alignednontemporalstore (v8i32 VR256:$src), addr:$dst), + (VMOVNTDQYmr addr:$dst, VR256:$src)>; + def : Pat<(alignednontemporalstore (v16i16 VR256:$src), addr:$dst), + (VMOVNTDQYmr addr:$dst, VR256:$src)>; + def : Pat<(alignednontemporalstore (v32i8 VR256:$src), addr:$dst), + (VMOVNTDQYmr addr:$dst, VR256:$src)>; +} + let Predicates = [HasAVX, NoVLX] in { def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst), - (VMOVNTPSmr addr:$dst, VR128:$src)>; + (VMOVNTDQmr addr:$dst, VR128:$src)>; + def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst), + (VMOVNTDQmr addr:$dst, VR128:$src)>; + def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst), + (VMOVNTDQmr addr:$dst, VR128:$src)>; } def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst), - (MOVNTPSmr addr:$dst, VR128:$src)>; + (MOVNTDQmr addr:$dst, VR128:$src)>; +def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst), + (MOVNTDQmr addr:$dst, VR128:$src)>; +def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst), + (MOVNTDQmr addr:$dst, VR128:$src)>; } // AddedComplexity @@ -4890,7 +4901,8 @@ let Predicates = [UseAVX] in { def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), (VMOVDI2PDIrr GR32:$src)>; - // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. + // AVX 128-bit movd/movq instructions write zeros in the high 128-bit part. + // These instructions also write zeros in the high part of a 256-bit register. let AddedComplexity = 20 in { def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), (VMOVDI2PDIrm addr:$src)>; @@ -4898,6 +4910,9 @@ let Predicates = [UseAVX] in { (VMOVDI2PDIrm addr:$src)>; def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), (VMOVDI2PDIrm addr:$src)>; + def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, + (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))), + (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrm addr:$src), sub_xmm)>; } // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext. def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, @@ -5016,6 +5031,9 @@ let Predicates = [UseAVX], AddedComplexity = 20 in { (VMOVZQI2PQIrm addr:$src)>; def : Pat<(v2i64 (X86vzload addr:$src)), (VMOVZQI2PQIrm addr:$src)>; + def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, + (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))), + (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrm addr:$src), sub_xmm)>; } let Predicates = [UseSSE2], AddedComplexity = 20 in { @@ -7150,6 +7168,10 @@ let Predicates = [HasAVX2] in { } // Patterns +// FIXME: Prefer a movss or movsd over a blendps when optimizing for size or +// on targets where they have equal performance. These were changed to use +// blends because blends have better throughput on SandyBridge and Haswell, but +// movs[s/d] are 1-2 byte shorter instructions. let Predicates = [UseAVX] in { let AddedComplexity = 15 in { // Move scalar to XMM zero-extended, zeroing a VR128 then do a @@ -7166,8 +7188,10 @@ let Predicates = [UseAVX] in { // Move low f32 and clear high bits. def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), (VBLENDPSYrri (v8f32 (AVX_SET0)), VR256:$src, (i8 1))>; - def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), - (VBLENDPSYrri (v8i32 (AVX_SET0)), VR256:$src, (i8 1))>; + + // Move low f64 and clear high bits. + def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))), + (VBLENDPDYrri (v4f64 (AVX_SET0)), VR256:$src, (i8 1))>; } def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, @@ -7181,14 +7205,19 @@ let Predicates = [UseAVX] in { (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)), sub_xmm)>; - // Move low f64 and clear high bits. - def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))), - (VBLENDPDYrri (v4f64 (AVX_SET0)), VR256:$src, (i8 1))>; - + // These will incur an FP/int domain crossing penalty, but it may be the only + // way without AVX2. Do not add any complexity because we may be able to match + // more optimal patterns defined earlier in this file. + def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), + (VBLENDPSYrri (v8i32 (AVX_SET0)), VR256:$src, (i8 1))>; def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))), (VBLENDPDYrri (v4i64 (AVX_SET0)), VR256:$src, (i8 1))>; } +// FIXME: Prefer a movss or movsd over a blendps when optimizing for size or +// on targets where they have equal performance. These were changed to use +// blends because blends have better throughput on SandyBridge and Haswell, but +// movs[s/d] are 1-2 byte shorter instructions. let Predicates = [UseSSE41] in { // With SSE41 we can use blends for these patterns. def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), @@ -8341,7 +8370,7 @@ let Predicates = [HasAVX2] in { def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))), (VBROADCASTSDYrr VR128:$src)>; - // Provide aliases for broadcast from the same regitser class that + // Provide aliases for broadcast from the same register class that // automatically does the extract. def : Pat<(v32i8 (X86VBroadcast (v32i8 VR256:$src))), (VPBROADCASTBYrr (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 42256b2..28a3b7b 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -334,6 +334,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::FMUL_RND), X86_INTRINSIC_DATA(avx512_mask_mul_ps_512, INTR_TYPE_2OP_MASK, ISD::FMUL, X86ISD::FMUL_RND), + X86_INTRINSIC_DATA(avx512_mask_padd_d_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0), + X86_INTRINSIC_DATA(avx512_mask_padd_q_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0), + X86_INTRINSIC_DATA(avx512_mask_pand_d_512, INTR_TYPE_2OP_MASK, ISD::AND, 0), + X86_INTRINSIC_DATA(avx512_mask_pand_q_512, INTR_TYPE_2OP_MASK, ISD::AND, 0), X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_128, CMP_MASK, X86ISD::PCMPEQM, 0), X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_256, CMP_MASK, X86ISD::PCMPEQM, 0), X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_512, CMP_MASK, X86ISD::PCMPEQM, 0), @@ -358,6 +362,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128, CMP_MASK, X86ISD::PCMPGTM, 0), X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256, CMP_MASK, X86ISD::PCMPGTM, 0), X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512, CMP_MASK, X86ISD::PCMPGTM, 0), + X86_INTRINSIC_DATA(avx512_mask_pmul_dq_512, INTR_TYPE_2OP_MASK, + X86ISD::PMULDQ, 0), + X86_INTRINSIC_DATA(avx512_mask_pmulu_dq_512, INTR_TYPE_2OP_MASK, + X86ISD::PMULUDQ, 0), + X86_INTRINSIC_DATA(avx512_mask_por_d_512, INTR_TYPE_2OP_MASK, ISD::OR, 0), + X86_INTRINSIC_DATA(avx512_mask_por_q_512, INTR_TYPE_2OP_MASK, ISD::OR, 0), X86_INTRINSIC_DATA(avx512_mask_psll_d, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), X86_INTRINSIC_DATA(avx512_mask_psll_q, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), X86_INTRINSIC_DATA(avx512_mask_pslli_d, VSHIFT_MASK, X86ISD::VSHLI, 0), @@ -376,6 +386,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_psrli_q, VSHIFT_MASK, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_mask_psrlv_d, INTR_TYPE_2OP_MASK, ISD::SRL, 0), X86_INTRINSIC_DATA(avx512_mask_psrlv_q, INTR_TYPE_2OP_MASK, ISD::SRL, 0), + X86_INTRINSIC_DATA(avx512_mask_psub_d_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0), + X86_INTRINSIC_DATA(avx512_mask_psub_q_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0), + X86_INTRINSIC_DATA(avx512_mask_pxor_d_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0), + X86_INTRINSIC_DATA(avx512_mask_pxor_q_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0), X86_INTRINSIC_DATA(avx512_mask_rndscale_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RNDSCALE, 0), X86_INTRINSIC_DATA(avx512_mask_rndscale_ss, INTR_TYPE_SCALAR_MASK_RM, diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index ca8fc9c..4bfc7f9 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -193,7 +193,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, DAG.getConstant(Offset, AddrVT)), Src, DAG.getConstant(BytesLeft, SizeVT), - Align, isVolatile, DstPtrInfo.getWithOffset(Offset)); + Align, isVolatile, false, + DstPtrInfo.getWithOffset(Offset)); } // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. @@ -282,7 +283,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, SrcVT)), DAG.getConstant(BytesLeft, SizeVT), - Align, isVolatile, AlwaysInline, + Align, isVolatile, AlwaysInline, false, DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset))); } diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp index 215fe89..36b3b02 100644 --- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp +++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp @@ -30,7 +30,7 @@ void XCoreInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { } void XCoreInstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { + StringRef Annot, const MCSubtargetInfo &STI) { printInstruction(MI, O); printAnnotation(O, Annot); } diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h index 78521fd..6fd2dec 100644 --- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h +++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h @@ -32,7 +32,8 @@ public: static const char *getRegisterName(unsigned RegNo); void printRegName(raw_ostream &OS, unsigned RegNo) const override; - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; private: void printInlineJT(const MCInst *MI, int opNum, raw_ostream &O); void printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O); diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp index d0a09b2..4a790c8 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp @@ -81,12 +81,11 @@ static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM, return X; } -static MCInstPrinter *createXCoreMCInstPrinter(const Target &T, +static MCInstPrinter *createXCoreMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { + const MCRegisterInfo &MRI) { return new XCoreInstPrinter(MAI, MII, MRI); } diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 6e8a95a..c4e3bb8 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -308,7 +308,8 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const Constant *GA = ConstantExpr::getBitCast(const_cast<GlobalValue*>(GV), Ty); Ty = Type::getInt32Ty(*DAG.getContext()); Constant *Idx = ConstantInt::get(Ty, Offset); - Constant *GAI = ConstantExpr::getGetElementPtr(GA, Idx); + Constant *GAI = ConstantExpr::getGetElementPtr( + Type::getInt8Ty(*DAG.getContext()), GA, Idx); SDValue CP = DAG.getConstantPool(GAI, MVT::i32); return DAG.getLoad(getPointerTy(), DL, DAG.getEntryNode(), CP, MachinePointerInfo(), false, false, false, 0); @@ -1422,7 +1423,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, InVals.push_back(FIN); MemOps.push_back(DAG.getMemcpy(Chain, dl, FIN, ArgDI->SDV, DAG.getConstant(Size, MVT::i32), - Align, false, false, + Align, false, false, false, MachinePointerInfo(), MachinePointerInfo())); } else { @@ -1833,10 +1834,11 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, LD->getAlignment() == Alignment && !LD->isVolatile() && !LD->isIndexed() && Chain.reachesChainWithoutSideEffects(SDValue(LD, 1))) { + bool isTail = isInTailCallPosition(DAG, ST, Chain); return DAG.getMemmove(Chain, dl, ST->getBasePtr(), LD->getBasePtr(), DAG.getConstant(StoreBits/8, MVT::i32), - Alignment, false, ST->getPointerInfo(), + Alignment, false, isTail, ST->getPointerInfo(), LD->getPointerInfo()); } } diff --git a/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/lib/Target/XCore/XCoreLowerThreadLocal.cpp index b4c6a50..9fb63e9 100644 --- a/lib/Target/XCore/XCoreLowerThreadLocal.cpp +++ b/lib/Target/XCore/XCoreLowerThreadLocal.cpp @@ -82,8 +82,9 @@ createReplacementInstr(ConstantExpr *CE, Instruction *Instr) { case Instruction::GetElementPtr: { SmallVector<Value *,4> CEOpVec(CE->op_begin(), CE->op_end()); ArrayRef<Value *> CEOps(CEOpVec); - return dyn_cast<Instruction>(Builder.CreateInBoundsGEP(CEOps[0], - CEOps.slice(1))); + return dyn_cast<Instruction>(Builder.CreateInBoundsGEP( + cast<GEPOperator>(CE)->getSourceElementType(), CEOps[0], + CEOps.slice(1))); } case Instruction::Add: case Instruction::Sub: @@ -212,7 +213,8 @@ bool XCoreLowerThreadLocal::lowerGlobal(GlobalVariable *GV) { SmallVector<Value *, 2> Indices; Indices.push_back(Constant::getNullValue(Type::getInt64Ty(Ctx))); Indices.push_back(ThreadID); - Value *Addr = Builder.CreateInBoundsGEP(NewGV, Indices); + Value *Addr = + Builder.CreateInBoundsGEP(NewGV->getValueType(), NewGV, Indices); U->replaceUsesOfWith(GV, Addr); } diff --git a/lib/Target/XCore/XCoreTargetStreamer.h b/lib/Target/XCore/XCoreTargetStreamer.h index 48bf0fa..3563dbc 100644 --- a/lib/Target/XCore/XCoreTargetStreamer.h +++ b/lib/Target/XCore/XCoreTargetStreamer.h @@ -16,7 +16,7 @@ namespace llvm { class XCoreTargetStreamer : public MCTargetStreamer { public: XCoreTargetStreamer(MCStreamer &S); - virtual ~XCoreTargetStreamer(); + ~XCoreTargetStreamer() override; virtual void emitCCTopData(StringRef Name) = 0; virtual void emitCCTopFunction(StringRef Name) = 0; virtual void emitCCBottomData(StringRef Name) = 0; diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 46480bd..56975ea 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -207,6 +207,13 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { // Make sure that it is local to this module. if (!F || !F->hasLocalLinkage()) return nullptr; + // Don't promote arguments for variadic functions. Adding, removing, or + // changing non-pack parameters can change the classification of pack + // parameters. Frontends encode that classification at the call site in the + // IR, while in the callee the classification is determined dynamically based + // on the number of registers consumed so far. + if (F->isVarArg()) return nullptr; + // First check: see if there are any pointer arguments! If not, quick exit. SmallVector<Argument*, 16> PointerArgs; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) @@ -227,12 +234,6 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { isSelfRecursive = true; } - // Don't promote arguments for variadic functions. Adding, removing, or - // changing non-pack parameters can change the classification of pack - // parameters. Frontends encode that classification at the call site in the - // IR, while in the callee the classification is determined dynamically based - // on the number of registers consumed so far. - if (F->isVarArg()) return nullptr; const DataLayout &DL = F->getParent()->getDataLayout(); // Check to see which arguments are promotable. If an argument is promotable, @@ -674,8 +675,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, for (ScalarizeTable::iterator SI = ArgIndices.begin(), E = ArgIndices.end(); SI != E; ++SI) { // not allowed to dereference ->begin() if size() is 0 - Params.push_back( - GetElementPtrInst::getIndexedType(I->getType(), SI->second)); + Params.push_back(GetElementPtrInst::getIndexedType( + cast<PointerType>(I->getType()->getScalarType())->getElementType(), + SI->second)); assert(Params.back()); } @@ -704,7 +706,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, auto DI = FunctionDIs.find(F); if (DI != FunctionDIs.end()) { DISubprogram SP = DI->second; - SP.replaceFunction(NF); + SP->replaceFunction(NF); // Ensure the map is updated so it can be reused on subsequent argument // promotions of the same function. FunctionDIs.erase(DI); @@ -860,7 +862,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Update the callgraph to know that the callsite has been transformed. CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()]; - CalleeNode->replaceCallEdge(Call, New, NF_CGN); + CalleeNode->replaceCallEdge(CS, CallSite(New), NF_CGN); if (!Call->use_empty()) { Call->replaceAllUsesWith(New); diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 4431311..3be23d5 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -73,8 +73,8 @@ namespace { } std::string getDescription() const { - return std::string((IsArg ? "Argument #" : "Return value #")) - + utostr(Idx) + " of function " + F->getName().str(); + return (Twine(IsArg ? "Argument #" : "Return value #") + utostr(Idx) + + " of function " + F->getName()).str(); } }; @@ -304,7 +304,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { auto DI = FunctionDIs.find(&Fn); if (DI != FunctionDIs.end()) { DISubprogram SP = DI->second; - SP.replaceFunction(NF); + SP->replaceFunction(NF); // Ensure the map is updated so it can be reused on non-varargs argument // eliminations of the same function. FunctionDIs.erase(DI); @@ -482,7 +482,7 @@ DAE::Liveness DAE::SurveyUse(const Use *U, return Result; } - if (ImmutableCallSite CS = V) { + if (auto CS = ImmutableCallSite(V)) { const Function *F = CS.getCalledFunction(); if (F) { // Used in a direct call. @@ -1092,7 +1092,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Patch the pointer to LLVM function in debug info descriptor. auto DI = FunctionDIs.find(F); if (DI != FunctionDIs.end()) - DI->second.replaceFunction(NF); + DI->second->replaceFunction(NF); // Now that the old function is dead, delete it. F->eraseFromParent(); diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 20b41fb..b8c4f5d 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -564,6 +564,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access. Value *NewPtr = NewGlobals[Val]; + Type *NewTy = NewGlobals[Val]->getType(); // Form a shorter GEP if needed. if (GEP->getNumOperands() > 3) { @@ -572,7 +573,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { Idxs.push_back(NullInt); for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i) Idxs.push_back(CE->getOperand(i)); - NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), Idxs); + NewPtr = + ConstantExpr::getGetElementPtr(NewTy, cast<Constant>(NewPtr), Idxs); + NewTy = GetElementPtrInst::getIndexedType(NewTy, Idxs); } else { GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP); SmallVector<Value*, 8> Idxs; @@ -721,8 +724,8 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { else break; if (Idxs.size() == GEPI->getNumOperands()-1) - Changed |= OptimizeAwayTrappingUsesOfValue(GEPI, - ConstantExpr::getGetElementPtr(NewV, Idxs)); + Changed |= OptimizeAwayTrappingUsesOfValue( + GEPI, ConstantExpr::getGetElementPtr(nullptr, NewV, Idxs)); if (GEPI->use_empty()) { Changed = true; GEPI->eraseFromParent(); @@ -2338,7 +2341,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, Constant *IdxZero = ConstantInt::get(IdxTy, 0, false); Constant * const IdxList[] = {IdxZero, IdxZero}; - Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList); + Ptr = ConstantExpr::getGetElementPtr(nullptr, Ptr, IdxList); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) Ptr = ConstantFoldConstantExpression(CE, DL, TLI); @@ -2402,8 +2405,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, i != e; ++i) GEPOps.push_back(getVal(*i)); InstResult = - ConstantExpr::getGetElementPtr(P, GEPOps, - cast<GEPOperator>(GEP)->isInBounds()); + ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps, + cast<GEPOperator>(GEP)->isInBounds()); DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult << "\n"); } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { diff --git a/lib/Transforms/IPO/LowerBitSets.cpp b/lib/Transforms/IPO/LowerBitSets.cpp index fe00d92..f3f8529 100644 --- a/lib/Transforms/IPO/LowerBitSets.cpp +++ b/lib/Transforms/IPO/LowerBitSets.cpp @@ -349,7 +349,8 @@ void LowerBitSets::allocateByteArrays() { Constant *Idxs[] = {ConstantInt::get(IntPtrTy, 0), ConstantInt::get(IntPtrTy, ByteArrayOffsets[I])}; - Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(ByteArray, Idxs); + Constant *GEP = ConstantExpr::getInBoundsGetElementPtr( + ByteArrayConst->getType(), ByteArray, Idxs); // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures // that the pc-relative displacement is folded into the lea instead of the @@ -395,16 +396,17 @@ Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, } Constant *ByteArray = BAI->ByteArray; + Type *Ty = BAI->ByteArray->getValueType(); if (!LinkerSubsectionsViaSymbols && AvoidReuse) { // Each use of the byte array uses a different alias. This makes the // backend less likely to reuse previously computed byte array addresses, // improving the security of the CFI mechanism based on this pass. - ByteArray = GlobalAlias::create( - BAI->ByteArray->getType()->getElementType(), 0, - GlobalValue::PrivateLinkage, "bits_use", ByteArray, M); + ByteArray = GlobalAlias::create(BAI->ByteArray->getValueType(), 0, + GlobalValue::PrivateLinkage, "bits_use", + ByteArray, M); } - Value *ByteAddr = B.CreateGEP(ByteArray, BitOffset); + Value *ByteAddr = B.CreateGEP(Ty, ByteArray, BitOffset); Value *Byte = B.CreateLoad(ByteAddr); Value *ByteAndMask = B.CreateAnd(Byte, BAI->Mask); @@ -546,8 +548,8 @@ void LowerBitSets::buildBitSetsFromGlobals( // Multiply by 2 to account for padding elements. Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, I * 2)}; - Constant *CombinedGlobalElemPtr = - ConstantExpr::getGetElementPtr(CombinedGlobal, CombinedGlobalIdxs); + Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr( + NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs); if (LinkerSubsectionsViaSymbols) { Globals[I]->replaceAllUsesWith(CombinedGlobalElemPtr); } else { diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index d28d563..502451b 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -59,6 +59,10 @@ static cl::opt<bool> RunLoopRerolling("reroll-loops", cl::Hidden, cl::desc("Run the loop rerolling pass")); +static cl::opt<bool> +RunFloat2Int("float-to-int", cl::Hidden, cl::init(true), + cl::desc("Run the float2int (float demotion) pass")); + static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false), cl::Hidden, cl::desc("Run the load combining pass")); @@ -307,6 +311,9 @@ void PassManagerBuilder::populateModulePassManager( // we must insert a no-op module pass to reset the pass manager. MPM.add(createBarrierNoopPass()); + if (RunFloat2Int) + MPM.add(createFloat2IntPass()); + // Re-rotate loops in all our loop nests. These may have fallout out of // rotated form due to GVN or other transformations, and the vectorizer relies // on the rotated form. diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index 816978e..ad7c5a0 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -305,33 +305,29 @@ bool StripDeadDebugInfo::runOnModule(Module &M) { SmallVector<Metadata *, 64> LiveSubprograms; DenseSet<const MDNode *> VisitedSet; - for (DICompileUnit DIC : F.compile_units()) { - assert(DIC.Verify() && "DIC must verify as a DICompileUnit."); - + for (MDCompileUnit *DIC : F.compile_units()) { // Create our live subprogram list. - DIArray SPs = DIC.getSubprograms(); + MDSubprogramArray SPs = DIC->getSubprograms(); bool SubprogramChange = false; - for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { - DISubprogram DISP(SPs.getElement(i)); - assert(DISP.Verify() && "DISP must verify as a DISubprogram."); + for (unsigned i = 0, e = SPs.size(); i != e; ++i) { + DISubprogram DISP = SPs[i]; // Make sure we visit each subprogram only once. if (!VisitedSet.insert(DISP).second) continue; // If the function referenced by DISP is not null, the function is live. - if (DISP.getFunction()) + if (DISP->getFunction()) LiveSubprograms.push_back(DISP); else SubprogramChange = true; } // Create our live global variable list. - DIArray GVs = DIC.getGlobalVariables(); + MDGlobalVariableArray GVs = DIC->getGlobalVariables(); bool GlobalVariableChange = false; - for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) { - DIGlobalVariable DIG(GVs.getElement(i)); - assert(DIG.Verify() && "DIG must verify as DIGlobalVariable."); + for (unsigned i = 0, e = GVs.size(); i != e; ++i) { + DIGlobalVariable DIG = GVs[i]; // Make sure we only visit each global variable only once. if (!VisitedSet.insert(DIG).second) @@ -339,7 +335,7 @@ bool StripDeadDebugInfo::runOnModule(Module &M) { // If the global variable referenced by DIG is not null, the global // variable is live. - if (DIG.getGlobal()) + if (DIG->getVariable()) LiveGlobalVariables.push_back(DIG); else GlobalVariableChange = true; @@ -349,12 +345,12 @@ bool StripDeadDebugInfo::runOnModule(Module &M) { // subprogram list/global variable list with our new live subprogram/global // variable list. if (SubprogramChange) { - DIC.replaceSubprograms(DIArray(MDNode::get(C, LiveSubprograms))); + DIC->replaceSubprograms(MDTuple::get(C, LiveSubprograms)); Changed = true; } if (GlobalVariableChange) { - DIC.replaceGlobalVariables(DIArray(MDNode::get(C, LiveGlobalVariables))); + DIC->replaceGlobalVariables(MDTuple::get(C, LiveGlobalVariables)); Changed = true; } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 21243c2..56b6cd3 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -197,12 +197,51 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { return nullptr; } +static Value *SimplifyX86insertps(const IntrinsicInst &II, + InstCombiner::BuilderTy &Builder) { + if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) { + VectorType *VecTy = cast<VectorType>(II.getType()); + ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy); + + // The immediate permute control byte looks like this: + // [3:0] - zero mask for each 32-bit lane + // [5:4] - select one 32-bit destination lane + // [7:6] - select one 32-bit source lane + + uint8_t Imm = CInt->getZExtValue(); + uint8_t ZMask = Imm & 0xf; + uint8_t DestLane = (Imm >> 4) & 0x3; + uint8_t SourceLane = (Imm >> 6) & 0x3; + + // If all zero mask bits are set, this was just a weird way to + // generate a zero vector. + if (ZMask == 0xf) + return ZeroVector; + + // TODO: Model this case as two shuffles or a 'logical and' plus shuffle? + if (ZMask) + return nullptr; + + assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type"); + + // If we're not zeroing anything, this is a single shuffle. + // Replace the selected destination lane with the selected source lane. + // For all other lanes, pass the first source bits through. + int ShuffleMask[4] = { 0, 1, 2, 3 }; + ShuffleMask[DestLane] = SourceLane + 4; + + return Builder.CreateShuffleVector(II.getArgOperand(0), II.getArgOperand(1), + ShuffleMask); + } + return nullptr; +} + /// The shuffle mask for a perm2*128 selects any two halves of two 256-bit /// source vectors, unless a zero bit is set. If a zero bit is set, /// then ignore that half of the mask and clear that half of the vector. static Value *SimplifyX86vperm2(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder) { - if (auto CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) { + if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) { VectorType *VecTy = cast<VectorType>(II.getType()); ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy); @@ -415,112 +454,36 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } break; - case Intrinsic::uadd_with_overflow: { - Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, II); - if (OR == OverflowResult::NeverOverflows) - return CreateOverflowTuple(II, Builder->CreateNUWAdd(LHS, RHS), false); - if (OR == OverflowResult::AlwaysOverflows) - return CreateOverflowTuple(II, Builder->CreateAdd(LHS, RHS), true); - } - // FALL THROUGH uadd into sadd + + case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: - // Canonicalize constants into the RHS. + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: if (isa<Constant>(II->getArgOperand(0)) && !isa<Constant>(II->getArgOperand(1))) { + // Canonicalize constants into the RHS. Value *LHS = II->getArgOperand(0); II->setArgOperand(0, II->getArgOperand(1)); II->setArgOperand(1, LHS); return II; } + // fall through - // X + undef -> undef - if (isa<UndefValue>(II->getArgOperand(1))) - return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - - if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) { - // X + 0 -> {X, false} - if (RHS->isZero()) { - return CreateOverflowTuple(II, II->getArgOperand(0), false, - /*ReUseName*/false); - } - } - - // We can strength reduce reduce this signed add into a regular add if we - // can prove that it will never overflow. - if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow) { - Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - if (WillNotOverflowSignedAdd(LHS, RHS, *II)) { - return CreateOverflowTuple(II, Builder->CreateNSWAdd(LHS, RHS), false); - } - } - - break; case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: { - Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - // undef - X -> undef - // X - undef -> undef - if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) - return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - - if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) { - // X - 0 -> {X, false} - if (ConstRHS->isZero()) { - return CreateOverflowTuple(II, LHS, false, /*ReUseName*/false); - } - } - if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow) { - if (WillNotOverflowSignedSub(LHS, RHS, *II)) { - return CreateOverflowTuple(II, Builder->CreateNSWSub(LHS, RHS), false); - } - } else { - if (WillNotOverflowUnsignedSub(LHS, RHS, *II)) { - return CreateOverflowTuple(II, Builder->CreateNUWSub(LHS, RHS), false); - } - } - break; - } - case Intrinsic::umul_with_overflow: { - Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, II); - if (OR == OverflowResult::NeverOverflows) - return CreateOverflowTuple(II, Builder->CreateNUWMul(LHS, RHS), false); - if (OR == OverflowResult::AlwaysOverflows) - return CreateOverflowTuple(II, Builder->CreateMul(LHS, RHS), true); - } // FALL THROUGH - case Intrinsic::smul_with_overflow: - // Canonicalize constants into the RHS. - if (isa<Constant>(II->getArgOperand(0)) && - !isa<Constant>(II->getArgOperand(1))) { - Value *LHS = II->getArgOperand(0); - II->setArgOperand(0, II->getArgOperand(1)); - II->setArgOperand(1, LHS); - return II; - } - - // X * undef -> undef - if (isa<UndefValue>(II->getArgOperand(1))) - return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); + OverflowCheckFlavor OCF = + IntrinsicIDToOverflowCheckFlavor(II->getIntrinsicID()); + assert(OCF != OCF_INVALID && "unexpected!"); - if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) { - // X*0 -> {0, false} - if (RHSI->isZero()) - return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType())); + Value *OperationResult = nullptr; + Constant *OverflowResult = nullptr; + if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1), + *II, OperationResult, OverflowResult)) + return CreateOverflowTuple(II, OperationResult, OverflowResult); - // X * 1 -> {X, false} - if (RHSI->equalsInt(1)) { - return CreateOverflowTuple(II, II->getArgOperand(0), false, - /*ReUseName*/false); - } - } - if (II->getIntrinsicID() == Intrinsic::smul_with_overflow) { - Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - if (WillNotOverflowSignedMul(LHS, RHS, *II)) { - return CreateOverflowTuple(II, Builder->CreateNSWMul(LHS, RHS), false); - } - } break; + } + case Intrinsic::minnum: case Intrinsic::maxnum: { Value *Arg0 = II->getArgOperand(0); @@ -806,7 +769,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } break; } - + case Intrinsic::x86_sse41_insertps: + if (Value *V = SimplifyX86insertps(*II, *Builder)) + return ReplaceInstUsesWith(*II, V); + break; + case Intrinsic::x86_sse4a_insertqi: { // insertqi x, y, 64, 0 can just copy y's lower bits and leave the top // ones undef diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index fe544c2..bd79a26 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1450,42 +1450,6 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { CI.setOperand(0, GEP->getOperand(0)); return &CI; } - - // If the GEP has a single use, and the base pointer is a bitcast, and the - // GEP computes a constant offset, see if we can convert these three - // instructions into fewer. This typically happens with unions and other - // non-type-safe code. - unsigned AS = GEP->getPointerAddressSpace(); - unsigned OffsetBits = DL.getPointerSizeInBits(AS); - APInt Offset(OffsetBits, 0); - BitCastInst *BCI = dyn_cast<BitCastInst>(GEP->getOperand(0)); - if (GEP->hasOneUse() && BCI && GEP->accumulateConstantOffset(DL, Offset)) { - // FIXME: This is insufficiently tested - just a no-crash test - // (test/Transforms/InstCombine/2007-05-14-Crash.ll) - // - // Get the base pointer input of the bitcast, and the type it points to. - Value *OrigBase = BCI->getOperand(0); - SmallVector<Value*, 8> NewIndices; - if (FindElementAtOffset(OrigBase->getType(), Offset.getSExtValue(), - NewIndices)) { - // FIXME: This codepath is completely untested - could be unreachable - // for all I know. - // If we were able to index down into an element, create the GEP - // and bitcast the result. This eliminates one bitcast, potentially - // two. - Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() - ? Builder->CreateInBoundsGEP(OrigBase, NewIndices) - : Builder->CreateGEP( - OrigBase->getType()->getPointerElementType(), - OrigBase, NewIndices); - NGEP->takeName(GEP); - - if (isa<BitCastInst>(CI)) - return new BitCastInst(NGEP, CI.getType()); - assert(isa<PtrToIntInst>(CI)); - return new PtrToIntInst(NGEP, CI.getType()); - } - } } return commonCastTransforms(CI); diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 803b50a..223bba0 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2109,33 +2109,112 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, return ExtractValueInst::Create(Call, 1, "sadd.overflow"); } -static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV, - InstCombiner &IC) { - // Don't bother doing this transformation for pointers, don't do it for - // vectors. - if (!isa<IntegerType>(OrigAddV->getType())) return nullptr; +bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, + Value *RHS, Instruction &OrigI, + Value *&Result, Constant *&Overflow) { + assert((!OrigI.isCommutative() || + !(isa<Constant>(LHS) && !isa<Constant>(RHS))) && + "call with a constant RHS if possible!"); + + auto SetResult = [&](Value *OpResult, Constant *OverflowVal, bool ReuseName) { + Result = OpResult; + Overflow = OverflowVal; + if (ReuseName) + Result->takeName(&OrigI); + return true; + }; - // If the add is a constant expr, then we don't bother transforming it. - Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV); - if (!OrigAdd) return nullptr; + switch (OCF) { + case OCF_INVALID: + llvm_unreachable("bad overflow check kind!"); - Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1); + case OCF_UNSIGNED_ADD: { + OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, &OrigI); + if (OR == OverflowResult::NeverOverflows) + return SetResult(Builder->CreateNUWAdd(LHS, RHS), Builder->getFalse(), + true); - // Put the new code above the original add, in case there are any uses of the - // add between the add and the compare. - InstCombiner::BuilderTy *Builder = IC.Builder; - Builder->SetInsertPoint(OrigAdd); + if (OR == OverflowResult::AlwaysOverflows) + return SetResult(Builder->CreateAdd(LHS, RHS), Builder->getTrue(), true); + } + // FALL THROUGH uadd into sadd + case OCF_SIGNED_ADD: { + // X + undef -> undef + if (isa<UndefValue>(RHS)) + return SetResult(UndefValue::get(RHS->getType()), + UndefValue::get(Builder->getInt1Ty()), false); + + if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) + // X + 0 -> {X, false} + if (ConstRHS->isZero()) + return SetResult(LHS, Builder->getFalse(), false); + + // We can strength reduce this signed add into a regular add if we can prove + // that it will never overflow. + if (OCF == OCF_SIGNED_ADD) + if (WillNotOverflowSignedAdd(LHS, RHS, OrigI)) + return SetResult(Builder->CreateNSWAdd(LHS, RHS), Builder->getFalse(), + true); + } - Module *M = I.getParent()->getParent()->getParent(); - Type *Ty = LHS->getType(); - Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty); - CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd"); - Value *Add = Builder->CreateExtractValue(Call, 0); + case OCF_UNSIGNED_SUB: + case OCF_SIGNED_SUB: { + // undef - X -> undef + // X - undef -> undef + if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) + return SetResult(UndefValue::get(LHS->getType()), + UndefValue::get(Builder->getInt1Ty()), false); + + if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) + // X - 0 -> {X, false} + if (ConstRHS->isZero()) + return SetResult(UndefValue::get(LHS->getType()), Builder->getFalse(), + false); + + if (OCF == OCF_SIGNED_SUB) { + if (WillNotOverflowSignedSub(LHS, RHS, OrigI)) + return SetResult(Builder->CreateNSWSub(LHS, RHS), Builder->getFalse(), + true); + } else { + if (WillNotOverflowUnsignedSub(LHS, RHS, OrigI)) + return SetResult(Builder->CreateNUWSub(LHS, RHS), Builder->getFalse(), + true); + } + break; + } - IC.ReplaceInstUsesWith(*OrigAdd, Add); + case OCF_UNSIGNED_MUL: { + OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, &OrigI); + if (OR == OverflowResult::NeverOverflows) + return SetResult(Builder->CreateNUWMul(LHS, RHS), Builder->getFalse(), + true); + if (OR == OverflowResult::AlwaysOverflows) + return SetResult(Builder->CreateMul(LHS, RHS), Builder->getTrue(), true); + } // FALL THROUGH + case OCF_SIGNED_MUL: + // X * undef -> undef + if (isa<UndefValue>(RHS)) + return SetResult(UndefValue::get(LHS->getType()), + UndefValue::get(Builder->getInt1Ty()), false); + + if (ConstantInt *RHSI = dyn_cast<ConstantInt>(RHS)) { + // X * 0 -> {0, false} + if (RHSI->isZero()) + return SetResult(Constant::getNullValue(RHS->getType()), + Builder->getFalse(), false); + + // X * 1 -> {X, false} + if (RHSI->equalsInt(1)) + return SetResult(LHS, Builder->getFalse(), false); + } - // The original icmp gets replaced with the overflow value. - return ExtractValueInst::Create(Call, 1, "uadd.overflow"); + if (OCF == OCF_SIGNED_MUL) + if (WillNotOverflowSignedMul(LHS, RHS, OrigI)) + return SetResult(Builder->CreateNSWMul(LHS, RHS), Builder->getFalse(), + true); + } + + return false; } /// \brief Recognize and process idiom involving test for multiplication @@ -3432,21 +3511,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A); } - // (a+b) <u a --> llvm.uadd.with.overflow. - // (a+b) <u b --> llvm.uadd.with.overflow. - if (I.getPredicate() == ICmpInst::ICMP_ULT && - match(Op0, m_Add(m_Value(A), m_Value(B))) && - (Op1 == A || Op1 == B)) - if (Instruction *R = ProcessUAddIdiom(I, Op0, *this)) - return R; - - // a >u (a+b) --> llvm.uadd.with.overflow. - // b >u (a+b) --> llvm.uadd.with.overflow. - if (I.getPredicate() == ICmpInst::ICMP_UGT && - match(Op1, m_Add(m_Value(A), m_Value(B))) && - (Op0 == A || Op0 == B)) - if (Instruction *R = ProcessUAddIdiom(I, Op1, *this)) - return R; + Instruction *AddI = nullptr; + if (match(&I, m_UAddWithOverflow(m_Value(A), m_Value(B), + m_Instruction(AddI))) && + isa<IntegerType>(A->getType())) { + Value *Result; + Constant *Overflow; + if (OptimizeOverflowCheck(OCF_UNSIGNED_ADD, A, B, *AddI, Result, + Overflow)) { + ReplaceInstUsesWith(*AddI, Result); + return ReplaceInstUsesWith(I, Overflow); + } + } // (zext a) * (zext b) --> llvm.umul.with.overflow. if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) { diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index fb2321d..d0caf34 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -110,6 +110,41 @@ static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) { return false; } + +/// \brief Specific patterns of overflow check idioms that we match. +enum OverflowCheckFlavor { + OCF_UNSIGNED_ADD, + OCF_SIGNED_ADD, + OCF_UNSIGNED_SUB, + OCF_SIGNED_SUB, + OCF_UNSIGNED_MUL, + OCF_SIGNED_MUL, + + OCF_INVALID +}; + +/// \brief Returns the OverflowCheckFlavor corresponding to a overflow_with_op +/// intrinsic. +static inline OverflowCheckFlavor +IntrinsicIDToOverflowCheckFlavor(unsigned ID) { + switch (ID) { + default: + return OCF_INVALID; + case Intrinsic::uadd_with_overflow: + return OCF_UNSIGNED_ADD; + case Intrinsic::sadd_with_overflow: + return OCF_SIGNED_ADD; + case Intrinsic::usub_with_overflow: + return OCF_UNSIGNED_SUB; + case Intrinsic::ssub_with_overflow: + return OCF_SIGNED_SUB; + case Intrinsic::umul_with_overflow: + return OCF_UNSIGNED_MUL; + case Intrinsic::smul_with_overflow: + return OCF_SIGNED_MUL; + } +} + /// \brief An IRBuilder inserter that adds new instructions to the instcombine /// worklist. class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter @@ -316,7 +351,7 @@ private: bool ShouldChangeType(Type *From, Type *To) const; Value *dyn_castNegVal(Value *V) const; Value *dyn_castFNegVal(Value *V, bool NoSignedZero = false) const; - Type *FindElementAtOffset(Type *PtrTy, int64_t Offset, + Type *FindElementAtOffset(PointerType *PtrTy, int64_t Offset, SmallVectorImpl<Value *> &NewIndices); Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); @@ -329,6 +364,17 @@ private: bool ShouldOptimizeCast(Instruction::CastOps opcode, const Value *V, Type *Ty); + /// \brief Try to optimize a sequence of instructions checking if an operation + /// on LHS and RHS overflows. + /// + /// If a simplification is possible, stores the simplified result of the + /// operation in OperationResult and result of the overflow check in + /// OverflowResult, and return true. If no simplification is possible, + /// returns false. + bool OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, Value *RHS, + Instruction &CtxI, Value *&OperationResult, + Constant *&OverflowResult); + Instruction *visitCallSite(CallSite CS); Instruction *tryOptimizeCall(CallInst *CI); bool transformConstExprCastCall(CallSite CS); @@ -391,14 +437,10 @@ public: } /// Creates a result tuple for an overflow intrinsic \p II with a given - /// \p Result and a constant \p Overflow value. If \p ReUseName is true the - /// \p Result's name is taken from \p II. + /// \p Result and a constant \p Overflow value. Instruction *CreateOverflowTuple(IntrinsicInst *II, Value *Result, - bool Overflow, bool ReUseName = true) { - if (ReUseName) - Result->takeName(II); - Constant *V[] = {UndefValue::get(Result->getType()), - Overflow ? Builder->getTrue() : Builder->getFalse()}; + Constant *Overflow) { + Constant *V[] = {UndefValue::get(Result->getType()), Overflow}; StructType *ST = cast<StructType>(II->getType()); Constant *Struct = ConstantStruct::get(ST, V); return InsertValueInst::Create(Struct, Result, 0); diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 6b0f268..d8a559c 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -84,7 +84,7 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, continue; } - if (CallSite CS = I) { + if (auto CS = CallSite(I)) { // If this is the function being called then we treat it like a load and // ignore it. if (CS.isCallee(&U)) @@ -611,8 +611,10 @@ static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI, return false; SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx); - Type *AllocTy = - GetElementPtrInst::getIndexedType(GEPI->getOperand(0)->getType(), Ops); + Type *AllocTy = GetElementPtrInst::getIndexedType( + cast<PointerType>(GEPI->getOperand(0)->getType()->getScalarType()) + ->getElementType(), + Ops); if (!AllocTy || !AllocTy->isSized()) return false; const DataLayout &DL = IC.getDataLayout(); diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index b6beb65..24446c8 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -987,8 +987,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { unsigned BegIdx = Mask.front(); VectorType *SrcTy = cast<VectorType>(V->getType()); unsigned VecBitWidth = SrcTy->getBitWidth(); - unsigned SrcElemBitWidth = - SrcTy->getElementType()->getPrimitiveSizeInBits(); + unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType()); assert(SrcElemBitWidth && "vector elements must have a bitwidth"); unsigned SrcNumElems = SrcTy->getNumElements(); SmallVector<BitCastInst *, 8> BCs; @@ -1000,7 +999,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { BCs.push_back(BC); for (BitCastInst *BC : BCs) { Type *TgtTy = BC->getDestTy(); - unsigned TgtElemBitWidth = TgtTy->getPrimitiveSizeInBits(); + unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy); if (!TgtElemBitWidth) continue; unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth; diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 90551e4..3b46156 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -869,11 +869,9 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { /// whether or not there is a sequence of GEP indices into the pointed type that /// will land us at the specified offset. If so, fill them into NewIndices and /// return the resultant element type, otherwise return null. -Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset, +Type *InstCombiner::FindElementAtOffset(PointerType *PtrTy, int64_t Offset, SmallVectorImpl<Value *> &NewIndices) { - assert(PtrTy->isPtrOrPtrVectorTy()); - - Type *Ty = PtrTy->getPointerElementType(); + Type *Ty = PtrTy->getElementType(); if (!Ty->isSized()) return nullptr; @@ -1611,12 +1609,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // %0 = GEP [10 x i8] addrspace(1)* X, ... // addrspacecast i8 addrspace(1)* %0 to i8* SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end()); - Value *NewGEP = - GEP.isInBounds() - ? Builder->CreateInBoundsGEP(StrippedPtr, Idx, - GEP.getName()) - : Builder->CreateGEP(StrippedPtrTy->getElementType(), - StrippedPtr, Idx, GEP.getName()); + Value *NewGEP = GEP.isInBounds() + ? Builder->CreateInBoundsGEP( + nullptr, StrippedPtr, Idx, GEP.getName()) + : Builder->CreateGEP(nullptr, StrippedPtr, Idx, + GEP.getName()); return new AddrSpaceCastInst(NewGEP, GEP.getType()); } } @@ -1634,9 +1631,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) }; Value *NewGEP = GEP.isInBounds() - ? Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) - : Builder->CreateGEP(StrippedPtrTy->getElementType(), - StrippedPtr, Idx, GEP.getName()); + ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, Idx, + GEP.getName()) + : Builder->CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName()); // V and GEP are both pointer types --> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, @@ -1669,10 +1666,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // GEP may not be "inbounds". Value *NewGEP = GEP.isInBounds() && NSW - ? Builder->CreateInBoundsGEP(StrippedPtr, NewIdx, + ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, NewIdx, GEP.getName()) - : Builder->CreateGEP(StrippedPtrTy->getElementType(), - StrippedPtr, NewIdx, GEP.getName()); + : Builder->CreateGEP(nullptr, StrippedPtr, NewIdx, + GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, @@ -1710,9 +1707,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { Constant::getNullValue(DL.getIntPtrType(GEP.getType())), NewIdx}; - Value *NewGEP = GEP.isInBounds() && NSW ? - Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) : - Builder->CreateGEP(SrcElTy, StrippedPtr, Off, GEP.getName()); + Value *NewGEP = GEP.isInBounds() && NSW + ? Builder->CreateInBoundsGEP( + SrcElTy, StrippedPtr, Off, GEP.getName()) + : Builder->CreateGEP(SrcElTy, StrippedPtr, Off, + GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEP.getType()); @@ -1774,9 +1773,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // GEP. SmallVector<Value*, 8> NewIndices; if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) { - Value *NGEP = GEP.isInBounds() ? - Builder->CreateInBoundsGEP(Operand, NewIndices) : - Builder->CreateGEP(OpType->getElementType(), Operand, NewIndices); + Value *NGEP = + GEP.isInBounds() + ? Builder->CreateInBoundsGEP(nullptr, Operand, NewIndices) + : Builder->CreateGEP(nullptr, Operand, NewIndices); if (NGEP->getType() == GEP.getType()) return ReplaceInstUsesWith(GEP, NGEP); @@ -2268,7 +2268,8 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // We need to insert these at the location of the old load, not at that of // the extractvalue. Builder->SetInsertPoint(L->getParent(), L); - Value *GEP = Builder->CreateInBoundsGEP(L->getPointerOperand(), Indices); + Value *GEP = Builder->CreateInBoundsGEP(L->getType(), + L->getPointerOperand(), Indices); // Returning the load directly will cause the main loop to insert it in // the wrong spot, so use ReplaceInstUsesWith(). return ReplaceInstUsesWith(EV, Builder->CreateLoad(GEP)); @@ -2725,7 +2726,7 @@ bool InstCombiner::run() { DEBUG(dbgs() << "IC: Old = " << *I << '\n' << " New = " << *Result << '\n'); - if (!I->getDebugLoc().isUnknown()) + if (I->getDebugLoc()) Result->setDebugLoc(I->getDebugLoc()); // Everything uses the new instruction now. I->replaceAllUsesWith(Result); diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 978c857..8d6d3ce 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -401,12 +401,12 @@ struct AddressSanitizer : public FunctionPass { return SizeInBytes; } /// Check if we want (and can) handle this alloca. - bool isInterestingAlloca(AllocaInst &AI) const; + bool isInterestingAlloca(AllocaInst &AI); /// If it is an interesting memory access, return the PointerOperand /// and set IsWrite/Alignment. Otherwise return nullptr. Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite, uint64_t *TypeSize, - unsigned *Alignment) const; + unsigned *Alignment); void instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, Instruction *I, bool UseCalls, const DataLayout &DL); void instrumentPointerComparisonOrSubtraction(Instruction *I); @@ -458,6 +458,7 @@ struct AddressSanitizer : public FunctionPass { Function *AsanMemmove, *AsanMemcpy, *AsanMemset; InlineAsm *EmptyAsm; GlobalsMetadata GlobalsMD; + DenseMap<AllocaInst *, bool> ProcessedAllocas; friend struct FunctionStackPoisoner; }; @@ -804,13 +805,21 @@ void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { } /// Check if we want (and can) handle this alloca. -bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) const { - return (AI.getAllocatedType()->isSized() && - // alloca() may be called with 0 size, ignore it. - getAllocaSizeInBytes(&AI) > 0 && - // We are only interested in allocas not promotable to registers. - // Promotable allocas are common under -O0. - (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI))); +bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) { + auto PreviouslySeenAllocaInfo = ProcessedAllocas.find(&AI); + + if (PreviouslySeenAllocaInfo != ProcessedAllocas.end()) + return PreviouslySeenAllocaInfo->getSecond(); + + bool IsInteresting = (AI.getAllocatedType()->isSized() && + // alloca() may be called with 0 size, ignore it. + getAllocaSizeInBytes(&AI) > 0 && + // We are only interested in allocas not promotable to registers. + // Promotable allocas are common under -O0. + (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI))); + + ProcessedAllocas[&AI] = IsInteresting; + return IsInteresting; } /// If I is an interesting memory access, return the PointerOperand @@ -818,7 +827,7 @@ bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) const { Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I, bool *IsWrite, uint64_t *TypeSize, - unsigned *Alignment) const { + unsigned *Alignment) { // Skip memory accesses inserted by another instrumentation. if (I->getMetadata("nosanitize")) return nullptr; @@ -959,18 +968,6 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, UseCalls, Exp); } -// Validate the result of Module::getOrInsertFunction called for an interface -// function of AddressSanitizer. If the instrumented module defines a function -// with the same name, their prototypes must match, otherwise -// getOrInsertFunction returns a bitcast. -static Function *checkInterfaceFunction(Constant *FuncOrBitcast) { - if (isa<Function>(FuncOrBitcast)) return cast<Function>(FuncOrBitcast); - FuncOrBitcast->dump(); - report_fatal_error( - "trying to redefine an AddressSanitizer " - "interface function"); -} - Instruction *AddressSanitizer::generateCrashCode(Instruction *InsertBefore, Value *Addr, bool IsWrite, size_t AccessSizeIndex, @@ -1056,7 +1053,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, // path is rarely taken. This seems to be the case for SPEC benchmarks. TerminatorInst *CheckTerm = SplitBlockAndInsertIfThen( Cmp, InsertBefore, false, MDBuilder(*C).createBranchWeights(1, 100000)); - assert(dyn_cast<BranchInst>(CheckTerm)->isUnconditional()); + assert(cast<BranchInst>(CheckTerm)->isUnconditional()); BasicBlock *NextBB = CheckTerm->getSuccessor(0); IRB.SetInsertPoint(CheckTerm); Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize); @@ -1219,17 +1216,17 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { void AddressSanitizerModule::initializeCallbacks(Module &M) { IRBuilder<> IRB(*C); // Declare our poisoning and unpoisoning functions. - AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction( + AsanPoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, nullptr)); AsanPoisonGlobals->setLinkage(Function::ExternalLinkage); - AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction( + AsanUnpoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanUnpoisonGlobalsName, IRB.getVoidTy(), nullptr)); AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage); // Declare functions that register/unregister globals. - AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction( + AsanRegisterGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); AsanRegisterGlobals->setLinkage(Function::ExternalLinkage); - AsanUnregisterGlobals = checkInterfaceFunction( + AsanUnregisterGlobals = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kAsanUnregisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage); @@ -1317,7 +1314,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { Indices2[1] = IRB.getInt32(0); G->replaceAllUsesWith( - ConstantExpr::getGetElementPtr(NewGlobal, Indices2, true)); + ConstantExpr::getGetElementPtr(NewTy, NewGlobal, Indices2, true)); NewGlobal->takeName(G); G->eraseFromParent(); @@ -1399,44 +1396,44 @@ void AddressSanitizer::initializeCallbacks(Module &M) { const std::string ExpStr = Exp ? "exp_" : ""; const Type *ExpType = Exp ? Type::getInt32Ty(*C) : nullptr; AsanErrorCallbackSized[AccessIsWrite][Exp] = - checkInterfaceFunction(M.getOrInsertFunction( + checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanReportErrorTemplate + ExpStr + TypeStr + "_n", IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr)); AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] = - checkInterfaceFunction(M.getOrInsertFunction( + checkSanitizerInterfaceFunction(M.getOrInsertFunction( ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N", IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr)); for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; AccessSizeIndex++) { const std::string Suffix = TypeStr + itostr(1 << AccessSizeIndex); AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] = - checkInterfaceFunction(M.getOrInsertFunction( + checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanReportErrorTemplate + ExpStr + Suffix, IRB.getVoidTy(), IntptrTy, ExpType, nullptr)); AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] = - checkInterfaceFunction(M.getOrInsertFunction( + checkSanitizerInterfaceFunction(M.getOrInsertFunction( ClMemoryAccessCallbackPrefix + ExpStr + Suffix, IRB.getVoidTy(), IntptrTy, ExpType, nullptr)); } } } - AsanMemmove = checkInterfaceFunction(M.getOrInsertFunction( + AsanMemmove = checkSanitizerInterfaceFunction(M.getOrInsertFunction( ClMemoryAccessCallbackPrefix + "memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, nullptr)); - AsanMemcpy = checkInterfaceFunction(M.getOrInsertFunction( + AsanMemcpy = checkSanitizerInterfaceFunction(M.getOrInsertFunction( ClMemoryAccessCallbackPrefix + "memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, nullptr)); - AsanMemset = checkInterfaceFunction(M.getOrInsertFunction( + AsanMemset = checkSanitizerInterfaceFunction(M.getOrInsertFunction( ClMemoryAccessCallbackPrefix + "memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy, nullptr)); - AsanHandleNoReturnFunc = checkInterfaceFunction( + AsanHandleNoReturnFunc = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy(), nullptr)); - AsanPtrCmpFunction = checkInterfaceFunction(M.getOrInsertFunction( + AsanPtrCmpFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanPtrCmp, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); - AsanPtrSubFunction = checkInterfaceFunction(M.getOrInsertFunction( + AsanPtrSubFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanPtrSub, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); // We insert an empty inline asm after __asan_report* to avoid callback merge. EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false), @@ -1461,7 +1458,7 @@ bool AddressSanitizer::doInitialization(Module &M) { BasicBlock *AsanCtorBB = BasicBlock::Create(*C, "", AsanCtorFunction); // call __asan_init in the module ctor. IRBuilder<> IRB(ReturnInst::Create(*C, AsanCtorBB)); - AsanInitFunction = checkInterfaceFunction( + AsanInitFunction = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kAsanInitName, IRB.getVoidTy(), nullptr)); AsanInitFunction->setLinkage(Function::ExternalLinkage); IRB.CreateCall(AsanInitFunction); @@ -1613,16 +1610,17 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) { IRBuilder<> IRB(*C); for (int i = 0; i <= kMaxAsanStackMallocSizeClass; i++) { std::string Suffix = itostr(i); - AsanStackMallocFunc[i] = checkInterfaceFunction(M.getOrInsertFunction( - kAsanStackMallocNameTemplate + Suffix, IntptrTy, IntptrTy, nullptr)); - AsanStackFreeFunc[i] = checkInterfaceFunction( + AsanStackMallocFunc[i] = checkSanitizerInterfaceFunction( + M.getOrInsertFunction(kAsanStackMallocNameTemplate + Suffix, IntptrTy, + IntptrTy, nullptr)); + AsanStackFreeFunc[i] = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kAsanStackFreeNameTemplate + Suffix, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); } - AsanPoisonStackMemoryFunc = checkInterfaceFunction( + AsanPoisonStackMemoryFunc = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kAsanPoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); - AsanUnpoisonStackMemoryFunc = checkInterfaceFunction( + AsanUnpoisonStackMemoryFunc = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kAsanUnpoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); } @@ -1757,9 +1755,11 @@ void FunctionStackPoisoner::poisonStack() { uint64_t LocalStackSize = L.FrameSize; bool DoStackMalloc = ClUseAfterReturn && LocalStackSize <= kMaxStackMallocSize; - // Don't do dynamic alloca in presence of inline asm: too often it - // makes assumptions on which registers are available. + // Don't do dynamic alloca in presence of inline asm: too often it makes + // assumptions on which registers are available. Don't do stack malloc in the + // presence of inline asm on 32-bit platforms for the same reason. bool DoDynamicAlloca = ClDynamicAllocaStack && !HasNonEmptyInlineAsm; + DoStackMalloc &= !HasNonEmptyInlineAsm || ASan.LongSize != 32; Value *StaticAlloca = DoDynamicAlloca ? nullptr : createAllocaForLayout(IRB, L, false); diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index b3925ee..06d5aed 100644 --- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -753,7 +753,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) { // Patch the pointer to LLVM function in debug info descriptor. auto DI = FunctionDIs.find(&F); if (DI != FunctionDIs.end()) - DI->second.replaceFunction(&F); + DI->second->replaceFunction(&F); UnwrappedFnMap[WrappedFnCst] = &F; *i = NewF; @@ -1075,8 +1075,8 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align, } case 2: { IRBuilder<> IRB(Pos); - Value *ShadowAddr1 = - IRB.CreateGEP(ShadowAddr, ConstantInt::get(DFS.IntptrTy, 1)); + Value *ShadowAddr1 = IRB.CreateGEP(DFS.ShadowTy, ShadowAddr, + ConstantInt::get(DFS.IntptrTy, 1)); return combineShadows(IRB.CreateAlignedLoad(ShadowAddr, ShadowAlign), IRB.CreateAlignedLoad(ShadowAddr1, ShadowAlign), Pos); } @@ -1127,7 +1127,8 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align, BasicBlock *NextBB = BasicBlock::Create(*DFS.Ctx, "", F); DT.addNewBlock(NextBB, LastBr->getParent()); IRBuilder<> NextIRB(NextBB); - WideAddr = NextIRB.CreateGEP(WideAddr, ConstantInt::get(DFS.IntptrTy, 1)); + WideAddr = NextIRB.CreateGEP(Type::getInt64Ty(*DFS.Ctx), WideAddr, + ConstantInt::get(DFS.IntptrTy, 1)); Value *NextWideShadow = NextIRB.CreateAlignedLoad(WideAddr, ShadowAlign); ShadowsEq = NextIRB.CreateICmpEQ(WideShadow, NextWideShadow); LastBr->setSuccessor(0, NextBB); @@ -1213,7 +1214,8 @@ void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align, Value *ShadowVecAddr = IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowVecTy)); do { - Value *CurShadowVecAddr = IRB.CreateConstGEP1_32(ShadowVecAddr, Offset); + Value *CurShadowVecAddr = + IRB.CreateConstGEP1_32(ShadowVecTy, ShadowVecAddr, Offset); IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign); Size -= ShadowVecSize; ++Offset; @@ -1221,7 +1223,8 @@ void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align, Offset *= ShadowVecSize; } while (Size > 0) { - Value *CurShadowAddr = IRB.CreateConstGEP1_32(ShadowAddr, Offset); + Value *CurShadowAddr = + IRB.CreateConstGEP1_32(DFS.ShadowTy, ShadowAddr, Offset); IRB.CreateAlignedStore(Shadow, CurShadowAddr, ShadowAlign); --Size; ++Offset; @@ -1469,17 +1472,17 @@ void DFSanVisitor::visitCallSite(CallSite CS) { Args.push_back(DFSF.getShadow(*i)); if (FT->isVarArg()) { - auto LabelVAAlloca = - new AllocaInst(ArrayType::get(DFSF.DFS.ShadowTy, - CS.arg_size() - FT->getNumParams()), - "labelva", DFSF.F->getEntryBlock().begin()); + auto *LabelVATy = ArrayType::get(DFSF.DFS.ShadowTy, + CS.arg_size() - FT->getNumParams()); + auto *LabelVAAlloca = new AllocaInst(LabelVATy, "labelva", + DFSF.F->getEntryBlock().begin()); for (unsigned n = 0; i != CS.arg_end(); ++i, ++n) { - auto LabelVAPtr = IRB.CreateStructGEP(LabelVAAlloca, n); + auto LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, n); IRB.CreateStore(DFSF.getShadow(*i), LabelVAPtr); } - Args.push_back(IRB.CreateStructGEP(LabelVAAlloca, 0)); + Args.push_back(IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, 0)); } if (!FT->getReturnType()->isVoidTy()) { @@ -1565,10 +1568,11 @@ void DFSanVisitor::visitCallSite(CallSite CS) { ArrayType *VarArgArrayTy = ArrayType::get(DFSF.DFS.ShadowTy, VarArgSize); AllocaInst *VarArgShadow = new AllocaInst(VarArgArrayTy, "", DFSF.F->getEntryBlock().begin()); - Args.push_back(IRB.CreateConstGEP2_32(VarArgShadow, 0, 0)); + Args.push_back(IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, 0)); for (unsigned n = 0; i != e; ++i, ++n) { - IRB.CreateStore(DFSF.getShadow(*i), - IRB.CreateConstGEP2_32(VarArgShadow, 0, n)); + IRB.CreateStore( + DFSF.getShadow(*i), + IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, n)); Args.push_back(*i); } } diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index a793e69..368a81d 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -149,10 +149,10 @@ ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) { return new GCOVProfiler(Options); } -static StringRef getFunctionName(DISubprogram SP) { - if (!SP.getLinkageName().empty()) - return SP.getLinkageName(); - return SP.getName(); +static StringRef getFunctionName(MDSubprogram *SP) { + if (!SP->getLinkageName().empty()) + return SP->getLinkageName(); + return SP->getName(); } namespace { @@ -163,7 +163,7 @@ namespace { static const char *const BlockTag; static const char *const EdgeTag; - GCOVRecord() {} + GCOVRecord() = default; void writeBytes(const char *Bytes, int Size) { os->write(Bytes, Size); @@ -315,7 +315,7 @@ namespace { ReturnBlock(1, os) { this->os = os; - Function *F = SP.getFunction(); + Function *F = SP->getFunction(); DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n"); uint32_t i = 0; @@ -330,7 +330,7 @@ namespace { std::string FunctionNameAndLine; raw_string_ostream FNLOS(FunctionNameAndLine); - FNLOS << getFunctionName(SP) << SP.getLineNumber(); + FNLOS << getFunctionName(SP) << SP->getLine(); FNLOS.flush(); FuncChecksum = hash_value(FunctionNameAndLine); } @@ -366,7 +366,7 @@ namespace { void writeOut() { writeBytes(FunctionTag, 4); uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(getFunctionName(SP)) + - 1 + lengthOfGCOVString(SP.getFilename()) + 1; + 1 + lengthOfGCOVString(SP->getFilename()) + 1; if (UseCfgChecksum) ++BlockLen; write(BlockLen); @@ -375,8 +375,8 @@ namespace { if (UseCfgChecksum) write(CfgChecksum); writeGCOVString(getFunctionName(SP)); - writeGCOVString(SP.getFilename()); - write(SP.getLineNumber()); + writeGCOVString(SP->getFilename()); + write(SP->getLine()); // Emit count of blocks. writeBytes(BlockTag, 4); @@ -437,12 +437,12 @@ std::string GCOVProfiler::mangleName(DICompileUnit CU, const char *NewStem) { } } - SmallString<128> Filename = CU.getFilename(); + SmallString<128> Filename = CU->getFilename(); sys::path::replace_extension(Filename, NewStem); StringRef FName = sys::path::filename(Filename); SmallString<128> CurPath; if (sys::fs::current_path(CurPath)) return FName; - sys::path::append(CurPath, FName.str()); + sys::path::append(CurPath, FName); return CurPath.str(); } @@ -466,7 +466,8 @@ static bool functionHasLines(Function *F) { if (isa<DbgInfoIntrinsic>(I)) continue; const DebugLoc &Loc = I->getDebugLoc(); - if (Loc.isUnknown()) continue; + if (!Loc) + continue; // Artificial lines such as calls to the global constructors. if (Loc.getLine() == 0) continue; @@ -486,21 +487,14 @@ void GCOVProfiler::emitProfileNotes() { // this pass over the original .o's as they're produced, or run it after // LTO, we'll generate the same .gcno files. - DICompileUnit CU(CU_Nodes->getOperand(i)); + DICompileUnit CU = cast<MDCompileUnit>(CU_Nodes->getOperand(i)); std::error_code EC; raw_fd_ostream out(mangleName(CU, "gcno"), EC, sys::fs::F_None); std::string EdgeDestinations; - DIArray SPs = CU.getSubprograms(); unsigned FunctionIdent = 0; - for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { - DISubprogram SP(SPs.getElement(i)); - assert((!SP || SP.isSubprogram()) && - "A MDNode in subprograms of a CU should be null or a DISubprogram."); - if (!SP) - continue; - - Function *F = SP.getFunction(); + for (auto *SP : CU->getSubprograms()) { + Function *F = SP->getFunction(); if (!F) continue; if (!functionHasLines(F)) continue; @@ -536,16 +530,18 @@ void GCOVProfiler::emitProfileNotes() { if (isa<DbgInfoIntrinsic>(I)) continue; const DebugLoc &Loc = I->getDebugLoc(); - if (Loc.isUnknown()) continue; + if (!Loc) + continue; // Artificial lines such as calls to the global constructors. if (Loc.getLine() == 0) continue; if (Line == Loc.getLine()) continue; Line = Loc.getLine(); - if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue; + if (SP != getDISubprogram(Loc.getScope())) + continue; - GCOVLines &Lines = Block.getFile(SP.getFilename()); + GCOVLines &Lines = Block.getFile(SP->getFilename()); Lines.addLine(Loc.getLine()); } } @@ -574,16 +570,10 @@ bool GCOVProfiler::emitProfileArcs() { bool Result = false; bool InsertIndCounterIncrCode = false; for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { - DICompileUnit CU(CU_Nodes->getOperand(i)); - DIArray SPs = CU.getSubprograms(); + DICompileUnit CU = cast<MDCompileUnit>(CU_Nodes->getOperand(i)); SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP; - for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { - DISubprogram SP(SPs.getElement(i)); - assert((!SP || SP.isSubprogram()) && - "A MDNode in subprograms of a CU should be null or a DISubprogram."); - if (!SP) - continue; - Function *F = SP.getFunction(); + for (auto *SP : CU->getSubprograms()) { + Function *F = SP->getFunction(); if (!F) continue; if (!functionHasLines(F)) continue; if (!Result) Result = true; @@ -603,7 +593,7 @@ bool GCOVProfiler::emitProfileArcs() { GlobalValue::InternalLinkage, Constant::getNullValue(CounterTy), "__llvm_gcov_ctr"); - CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP)); + CountersBySP.push_back(std::make_pair(Counters, SP)); UniqueVector<BasicBlock *> ComplexEdgePreds; UniqueVector<BasicBlock *> ComplexEdgeSuccs; @@ -628,7 +618,8 @@ bool GCOVProfiler::emitProfileArcs() { SmallVector<Value *, 2> Idx; Idx.push_back(Builder.getInt64(0)); Idx.push_back(Sel); - Value *Counter = Builder.CreateInBoundsGEP(Counters, Idx); + Value *Counter = Builder.CreateInBoundsGEP(Counters->getValueType(), + Counters, Idx); Value *Count = Builder.CreateLoad(Counter); Count = Builder.CreateAdd(Count, Builder.getInt64(1)); Builder.CreateStore(Count, Counter); @@ -855,7 +846,7 @@ Function *GCOVProfiler::insertCounterWriteout( NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); if (CU_Nodes) { for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { - DICompileUnit CU(CU_Nodes->getOperand(i)); + DICompileUnit CU = cast<MDCompileUnit>(CU_Nodes->getOperand(i)); std::string FilenameGcda = mangleName(CU, "gcda"); uint32_t CfgChecksum = FileChecksums.empty() ? 0 : FileChecksums[i]; Builder.CreateCall3(StartFile, @@ -863,7 +854,7 @@ Function *GCOVProfiler::insertCounterWriteout( Builder.CreateGlobalStringPtr(ReversedVersion), Builder.getInt32(CfgChecksum)); for (unsigned j = 0, e = CountersBySP.size(); j != e; ++j) { - DISubprogram SP(CountersBySP[j].second); + auto *SP = cast_or_null<MDSubprogram>(CountersBySP[j].second); uint32_t FuncChecksum = Funcs.empty() ? 0 : Funcs[j]->getFuncChecksum(); Builder.CreateCall5( EmitFunction, Builder.getInt32(j), @@ -922,7 +913,7 @@ void GCOVProfiler::insertIndirectCounterIncrement() { Value *ZExtPred = Builder.CreateZExt(Pred, Builder.getInt64Ty()); Arg = std::next(Fn->arg_begin()); Arg->setName("counters"); - Value *GEP = Builder.CreateGEP(Arg, ZExtPred); + Value *GEP = Builder.CreateGEP(Type::getInt64PtrTy(*Ctx), Arg, ZExtPred); Value *Counter = Builder.CreateLoad(GEP, "counter"); Cond = Builder.CreateICmpEQ(Counter, Constant::getNullValue( diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index c2aa1e2..2b35066 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -623,8 +623,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { Value *IntptrOriginPtr = IRB.CreatePointerCast(OriginPtr, PointerType::get(MS.IntptrTy, 0)); for (unsigned i = 0; i < Size / IntptrSize; ++i) { - Value *Ptr = - i ? IRB.CreateConstGEP1_32(IntptrOriginPtr, i) : IntptrOriginPtr; + Value *Ptr = i ? IRB.CreateConstGEP1_32(MS.IntptrTy, IntptrOriginPtr, i) + : IntptrOriginPtr; IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment); Ofs += IntptrSize / kOriginSize; CurrentAlignment = IntptrAlignment; @@ -632,7 +632,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { } for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) { - Value *GEP = i ? IRB.CreateConstGEP1_32(OriginPtr, i) : OriginPtr; + Value *GEP = + i ? IRB.CreateConstGEP1_32(nullptr, OriginPtr, i) : OriginPtr; IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment); CurrentAlignment = kMinOriginAlignment; } @@ -2843,7 +2844,8 @@ struct VarArgAMD64Helper : public VarArgHelper { Value *OverflowArgAreaPtr = IRB.CreateLoad(OverflowArgAreaPtrPtr); Value *OverflowArgAreaShadowPtr = MSV.getShadowPtr(OverflowArgAreaPtr, IRB.getInt8Ty(), IRB); - Value *SrcPtr = IRB.CreateConstGEP1_32(VAArgTLSCopy, AMD64FpEndOffset); + Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy, + AMD64FpEndOffset); IRB.CreateMemCpy(OverflowArgAreaShadowPtr, SrcPtr, VAArgOverflowSize, 16); } } diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index 289675e..662513d 100644 --- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -140,16 +140,6 @@ class SanitizerCoverageModule : public ModulePass { } // namespace -static Function *checkInterfaceFunction(Constant *FuncOrBitcast) { - if (Function *F = dyn_cast<Function>(FuncOrBitcast)) - return F; - std::string Err; - raw_string_ostream Stream(Err); - Stream << "SanitizerCoverage interface function redefined: " - << *FuncOrBitcast; - report_fatal_error(Err); -} - bool SanitizerCoverageModule::runOnModule(Module &M) { if (!CoverageLevel) return false; C = &(M.getContext()); @@ -167,16 +157,18 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { ReturnInst::Create(*C, BasicBlock::Create(*C, "", CtorFunc)); appendToGlobalCtors(M, CtorFunc, kSanCtorAndDtorPriority); - SanCovFunction = checkInterfaceFunction( + SanCovFunction = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kSanCovName, VoidTy, Int32PtrTy, nullptr)); - SanCovWithCheckFunction = checkInterfaceFunction( + SanCovWithCheckFunction = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kSanCovWithCheckName, VoidTy, Int32PtrTy, nullptr)); - SanCovIndirCallFunction = checkInterfaceFunction(M.getOrInsertFunction( - kSanCovIndirCallName, VoidTy, IntptrTy, IntptrTy, nullptr)); - SanCovTraceCmpFunction = checkInterfaceFunction(M.getOrInsertFunction( - kSanCovTraceCmp, VoidTy, Int64Ty, Int64Ty, Int64Ty, nullptr)); - - SanCovModuleInit = checkInterfaceFunction(M.getOrInsertFunction( + SanCovIndirCallFunction = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + kSanCovIndirCallName, VoidTy, IntptrTy, IntptrTy, nullptr)); + SanCovTraceCmpFunction = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + kSanCovTraceCmp, VoidTy, Int64Ty, Int64Ty, Int64Ty, nullptr)); + + SanCovModuleInit = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kSanCovModuleInitName, Type::getVoidTy(*C), Int32PtrTy, IntptrTy, Int8PtrTy, Int8PtrTy, nullptr)); SanCovModuleInit->setLinkage(Function::ExternalLinkage); @@ -186,9 +178,9 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { /*hasSideEffects=*/true); if (ClExperimentalTracing) { - SanCovTraceEnter = checkInterfaceFunction( + SanCovTraceEnter = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kSanCovTraceEnter, VoidTy, Int32PtrTy, nullptr)); - SanCovTraceBB = checkInterfaceFunction( + SanCovTraceBB = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kSanCovTraceBB, VoidTy, Int32PtrTy, nullptr)); } @@ -316,7 +308,7 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls( IRBuilder<> IRB(I); CallSite CS(I); Value *Callee = CS.getCalledValue(); - if (dyn_cast<InlineAsm>(Callee)) continue; + if (isa<InlineAsm>(Callee)) continue; GlobalVariable *CalleeCache = new GlobalVariable( *F.getParent(), Ty, false, GlobalValue::PrivateLinkage, Constant::getNullValue(Ty), "__sancov_gen_callee_cache"); @@ -366,8 +358,8 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, } bool IsEntryBB = &BB == &F.getEntryBlock(); - DebugLoc EntryLoc = IsEntryBB && !IP->getDebugLoc().isUnknown() - ? IP->getDebugLoc().getFnDebugLoc(*C) + DebugLoc EntryLoc = IsEntryBB && IP->getDebugLoc() + ? IP->getDebugLoc().getFnDebugLoc() : IP->getDebugLoc(); IRBuilder<> IRB(IP); IRB.SetCurrentDebugLocation(EntryLoc); diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index c3ba722..aa8ee5a 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -129,54 +129,48 @@ FunctionPass *llvm::createThreadSanitizerPass() { return new ThreadSanitizer(); } -static Function *checkInterfaceFunction(Constant *FuncOrBitcast) { - if (Function *F = dyn_cast<Function>(FuncOrBitcast)) - return F; - FuncOrBitcast->dump(); - report_fatal_error("ThreadSanitizer interface function redefined"); -} - void ThreadSanitizer::initializeCallbacks(Module &M) { IRBuilder<> IRB(M.getContext()); // Initialize the callbacks. - TsanFuncEntry = checkInterfaceFunction(M.getOrInsertFunction( + TsanFuncEntry = checkSanitizerInterfaceFunction(M.getOrInsertFunction( "__tsan_func_entry", IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); - TsanFuncExit = checkInterfaceFunction(M.getOrInsertFunction( - "__tsan_func_exit", IRB.getVoidTy(), nullptr)); + TsanFuncExit = checkSanitizerInterfaceFunction( + M.getOrInsertFunction("__tsan_func_exit", IRB.getVoidTy(), nullptr)); OrdTy = IRB.getInt32Ty(); for (size_t i = 0; i < kNumberOfAccessSizes; ++i) { const size_t ByteSize = 1 << i; const size_t BitSize = ByteSize * 8; SmallString<32> ReadName("__tsan_read" + itostr(ByteSize)); - TsanRead[i] = checkInterfaceFunction(M.getOrInsertFunction( + TsanRead[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( ReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); SmallString<32> WriteName("__tsan_write" + itostr(ByteSize)); - TsanWrite[i] = checkInterfaceFunction(M.getOrInsertFunction( + TsanWrite[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( WriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); SmallString<64> UnalignedReadName("__tsan_unaligned_read" + itostr(ByteSize)); - TsanUnalignedRead[i] = checkInterfaceFunction(M.getOrInsertFunction( - UnalignedReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); + TsanUnalignedRead[i] = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + UnalignedReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); SmallString<64> UnalignedWriteName("__tsan_unaligned_write" + itostr(ByteSize)); - TsanUnalignedWrite[i] = checkInterfaceFunction(M.getOrInsertFunction( - UnalignedWriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); + TsanUnalignedWrite[i] = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + UnalignedWriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); Type *Ty = Type::getIntNTy(M.getContext(), BitSize); Type *PtrTy = Ty->getPointerTo(); SmallString<32> AtomicLoadName("__tsan_atomic" + itostr(BitSize) + "_load"); - TsanAtomicLoad[i] = checkInterfaceFunction(M.getOrInsertFunction( - AtomicLoadName, Ty, PtrTy, OrdTy, nullptr)); + TsanAtomicLoad[i] = checkSanitizerInterfaceFunction( + M.getOrInsertFunction(AtomicLoadName, Ty, PtrTy, OrdTy, nullptr)); SmallString<32> AtomicStoreName("__tsan_atomic" + itostr(BitSize) + "_store"); - TsanAtomicStore[i] = checkInterfaceFunction(M.getOrInsertFunction( - AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy, - nullptr)); + TsanAtomicStore[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( + AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy, nullptr)); for (int op = AtomicRMWInst::FIRST_BINOP; op <= AtomicRMWInst::LAST_BINOP; ++op) { @@ -199,34 +193,34 @@ void ThreadSanitizer::initializeCallbacks(Module &M) { else continue; SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart); - TsanAtomicRMW[op][i] = checkInterfaceFunction(M.getOrInsertFunction( - RMWName, Ty, PtrTy, Ty, OrdTy, nullptr)); + TsanAtomicRMW[op][i] = checkSanitizerInterfaceFunction( + M.getOrInsertFunction(RMWName, Ty, PtrTy, Ty, OrdTy, nullptr)); } SmallString<32> AtomicCASName("__tsan_atomic" + itostr(BitSize) + "_compare_exchange_val"); - TsanAtomicCAS[i] = checkInterfaceFunction(M.getOrInsertFunction( + TsanAtomicCAS[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( AtomicCASName, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy, nullptr)); } - TsanVptrUpdate = checkInterfaceFunction(M.getOrInsertFunction( - "__tsan_vptr_update", IRB.getVoidTy(), IRB.getInt8PtrTy(), - IRB.getInt8PtrTy(), nullptr)); - TsanVptrLoad = checkInterfaceFunction(M.getOrInsertFunction( + TsanVptrUpdate = checkSanitizerInterfaceFunction( + M.getOrInsertFunction("__tsan_vptr_update", IRB.getVoidTy(), + IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), nullptr)); + TsanVptrLoad = checkSanitizerInterfaceFunction(M.getOrInsertFunction( "__tsan_vptr_read", IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); - TsanAtomicThreadFence = checkInterfaceFunction(M.getOrInsertFunction( + TsanAtomicThreadFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction( "__tsan_atomic_thread_fence", IRB.getVoidTy(), OrdTy, nullptr)); - TsanAtomicSignalFence = checkInterfaceFunction(M.getOrInsertFunction( + TsanAtomicSignalFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction( "__tsan_atomic_signal_fence", IRB.getVoidTy(), OrdTy, nullptr)); - MemmoveFn = checkInterfaceFunction(M.getOrInsertFunction( - "memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), - IRB.getInt8PtrTy(), IntptrTy, nullptr)); - MemcpyFn = checkInterfaceFunction(M.getOrInsertFunction( - "memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), - IntptrTy, nullptr)); - MemsetFn = checkInterfaceFunction(M.getOrInsertFunction( - "memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(), - IntptrTy, nullptr)); + MemmoveFn = checkSanitizerInterfaceFunction( + M.getOrInsertFunction("memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IntptrTy, nullptr)); + MemcpyFn = checkSanitizerInterfaceFunction( + M.getOrInsertFunction("memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IntptrTy, nullptr)); + MemsetFn = checkSanitizerInterfaceFunction( + M.getOrInsertFunction("memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IRB.getInt32Ty(), IntptrTy, nullptr)); } bool ThreadSanitizer::doInitialization(Module &M) { diff --git a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h index 87de33b..d4fef10 100644 --- a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h +++ b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h @@ -54,8 +54,6 @@ public: RetainAutorelease(nullptr), RetainAutoreleaseRV(nullptr) { } - ~ARCRuntimeEntryPoints() { } - void init(Module *M) { TheModule = M; AutoreleaseRV = nullptr; diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp index b197c97..4edd029 100644 --- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp +++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp @@ -45,7 +45,7 @@ bool llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr, default: break; } - ImmutableCallSite CS = static_cast<const Value *>(Inst); + ImmutableCallSite CS(Inst); assert(CS && "Only calls can alter reference counts!"); // See if AliasAnalysis can help us with the call. @@ -99,7 +99,7 @@ bool llvm::objcarc::CanUse(const Instruction *Inst, const Value *Ptr, // of any other dynamic reference-counted pointers. if (!IsPotentialRetainableObjPtr(ICI->getOperand(1), *PA.getAA())) return false; - } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) { + } else if (auto CS = ImmutableCallSite(Inst)) { // For calls, just check the arguments (and not the callee operand). for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(), OE = CS.arg_end(); OI != OE; ++OI) { diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp index 5aa2b97..8918909 100644 --- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -50,9 +50,9 @@ struct AlignmentFromAssumptions : public FunctionPass { initializeAlignmentFromAssumptionsPass(*PassRegistry::getPassRegistry()); } - bool runOnFunction(Function &F); + bool runOnFunction(Function &F) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<ScalarEvolution>(); AU.addRequired<DominatorTreeWrapperPass>(); diff --git a/lib/Transforms/Scalar/Android.mk b/lib/Transforms/Scalar/Android.mk index cf30f39..16f2ead 100644 --- a/lib/Transforms/Scalar/Android.mk +++ b/lib/Transforms/Scalar/Android.mk @@ -11,6 +11,7 @@ transforms_scalar_SRC_FILES := \ DeadStoreElimination.cpp \ EarlyCSE.cpp \ FlattenCFGPass.cpp \ + Float2Int.cpp \ GVN.cpp \ IndVarSimplify.cpp \ InductiveRangeCheckElimination.cpp \ @@ -30,6 +31,7 @@ transforms_scalar_SRC_FILES := \ LowerExpectIntrinsic.cpp \ MemCpyOptimizer.cpp \ MergedLoadStoreMotion.cpp \ + NaryReassociate.cpp \ PartiallyInlineLibCalls.cpp \ PlaceSafepoints.cpp \ Reassociate.cpp \ diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index d12fdb7..c29fcc3 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -9,6 +9,7 @@ add_llvm_library(LLVMScalarOpts DeadStoreElimination.cpp EarlyCSE.cpp FlattenCFGPass.cpp + Float2Int.cpp GVN.cpp InductiveRangeCheckElimination.cpp IndVarSimplify.cpp @@ -28,6 +29,7 @@ add_llvm_library(LLVMScalarOpts LowerExpectIntrinsic.cpp MemCpyOptimizer.cpp MergedLoadStoreMotion.cpp + NaryReassociate.cpp PartiallyInlineLibCalls.cpp PlaceSafepoints.cpp Reassociate.cpp diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index cb8981b..01952cf 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -168,7 +168,7 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) { return true; } } - if (CallSite CS = I) { + if (auto CS = CallSite(I)) { if (Function *F = CS.getCalledFunction()) { if (TLI && TLI->has(LibFunc::strcpy) && F->getName() == TLI->getName(LibFunc::strcpy)) { @@ -262,7 +262,7 @@ static bool isRemovable(Instruction *I) { } } - if (CallSite CS = I) + if (auto CS = CallSite(I)) return CS.getInstruction()->use_empty(); return false; @@ -306,7 +306,7 @@ static Value *getStoredPointerOperand(Instruction *I) { } } - CallSite CS = I; + CallSite CS(I); // All the supported functions so far happen to have dest as their first // argument. return CS.getArgument(0); @@ -780,7 +780,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { continue; } - if (CallSite CS = cast<Value>(BBI)) { + if (auto CS = CallSite(BBI)) { // Remove allocation function calls from the list of dead stack objects; // there can't be any references before the definition. if (isAllocLikeFn(BBI, TLI)) diff --git a/lib/Transforms/Scalar/Float2Int.cpp b/lib/Transforms/Scalar/Float2Int.cpp new file mode 100644 index 0000000..c931422 --- /dev/null +++ b/lib/Transforms/Scalar/Float2Int.cpp @@ -0,0 +1,540 @@ +//===- Float2Int.cpp - Demote floating point ops to work on integers ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Float2Int pass, which aims to demote floating +// point operations to work on integers, where that is losslessly possible. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "float2int" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include <deque> +#include <functional> // For std::function +using namespace llvm; + +// The algorithm is simple. Start at instructions that convert from the +// float to the int domain: fptoui, fptosi and fcmp. Walk up the def-use +// graph, using an equivalence datastructure to unify graphs that interfere. +// +// Mappable instructions are those with an integer corrollary that, given +// integer domain inputs, produce an integer output; fadd, for example. +// +// If a non-mappable instruction is seen, this entire def-use graph is marked +// as non-transformable. If we see an instruction that converts from the +// integer domain to FP domain (uitofp,sitofp), we terminate our walk. + +/// The largest integer type worth dealing with. +static cl::opt<unsigned> +MaxIntegerBW("float2int-max-integer-bw", cl::init(64), cl::Hidden, + cl::desc("Max integer bitwidth to consider in float2int" + "(default=64)")); + +namespace { + struct Float2Int : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + Float2Int() : FunctionPass(ID) { + initializeFloat2IntPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + } + + void findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots); + ConstantRange seen(Instruction *I, ConstantRange R); + ConstantRange badRange(); + ConstantRange unknownRange(); + ConstantRange validateRange(ConstantRange R); + void walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots); + void walkForwards(); + bool validateAndTransform(); + Value *convert(Instruction *I, Type *ToTy); + void cleanup(); + + MapVector<Instruction*, ConstantRange > SeenInsts; + SmallPtrSet<Instruction*,8> Roots; + EquivalenceClasses<Instruction*> ECs; + MapVector<Instruction*, Value*> ConvertedInsts; + LLVMContext *Ctx; + }; +} + +char Float2Int::ID = 0; +INITIALIZE_PASS(Float2Int, "float2int", "Float to int", false, false) + +// Given a FCmp predicate, return a matching ICmp predicate if one +// exists, otherwise return BAD_ICMP_PREDICATE. +static CmpInst::Predicate mapFCmpPred(CmpInst::Predicate P) { + switch (P) { + case CmpInst::FCMP_OEQ: + case CmpInst::FCMP_UEQ: + return CmpInst::ICMP_EQ; + case CmpInst::FCMP_OGT: + case CmpInst::FCMP_UGT: + return CmpInst::ICMP_SGT; + case CmpInst::FCMP_OGE: + case CmpInst::FCMP_UGE: + return CmpInst::ICMP_SGE; + case CmpInst::FCMP_OLT: + case CmpInst::FCMP_ULT: + return CmpInst::ICMP_SLT; + case CmpInst::FCMP_OLE: + case CmpInst::FCMP_ULE: + return CmpInst::ICMP_SLE; + case CmpInst::FCMP_ONE: + case CmpInst::FCMP_UNE: + return CmpInst::ICMP_NE; + default: + return CmpInst::BAD_ICMP_PREDICATE; + } +} + +// Given a floating point binary operator, return the matching +// integer version. +static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) { + switch (Opcode) { + default: llvm_unreachable("Unhandled opcode!"); + case Instruction::FAdd: return Instruction::Add; + case Instruction::FSub: return Instruction::Sub; + case Instruction::FMul: return Instruction::Mul; + } +} + +// Find the roots - instructions that convert from the FP domain to +// integer domain. +void Float2Int::findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots) { + for (auto &I : inst_range(F)) { + switch (I.getOpcode()) { + default: break; + case Instruction::FPToUI: + case Instruction::FPToSI: + Roots.insert(&I); + break; + case Instruction::FCmp: + if (mapFCmpPred(cast<CmpInst>(&I)->getPredicate()) != + CmpInst::BAD_ICMP_PREDICATE) + Roots.insert(&I); + break; + } + } +} + +// Helper - mark I as having been traversed, having range R. +ConstantRange Float2Int::seen(Instruction *I, ConstantRange R) { + DEBUG(dbgs() << "F2I: " << *I << ":" << R << "\n"); + if (SeenInsts.find(I) != SeenInsts.end()) + SeenInsts.find(I)->second = R; + else + SeenInsts.insert(std::make_pair(I, R)); + return R; +} + +// Helper - get a range representing a poison value. +ConstantRange Float2Int::badRange() { + return ConstantRange(MaxIntegerBW + 1, true); +} +ConstantRange Float2Int::unknownRange() { + return ConstantRange(MaxIntegerBW + 1, false); +} +ConstantRange Float2Int::validateRange(ConstantRange R) { + if (R.getBitWidth() > MaxIntegerBW + 1) + return badRange(); + return R; +} + +// The most obvious way to structure the search is a depth-first, eager +// search from each root. However, that require direct recursion and so +// can only handle small instruction sequences. Instead, we split the search +// up into two phases: +// - walkBackwards: A breadth-first walk of the use-def graph starting from +// the roots. Populate "SeenInsts" with interesting +// instructions and poison values if they're obvious and +// cheap to compute. Calculate the equivalance set structure +// while we're here too. +// - walkForwards: Iterate over SeenInsts in reverse order, so we visit +// defs before their uses. Calculate the real range info. + +// Breadth-first walk of the use-def graph; determine the set of nodes +// we care about and eagerly determine if some of them are poisonous. +void Float2Int::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) { + std::deque<Instruction*> Worklist(Roots.begin(), Roots.end()); + while (!Worklist.empty()) { + Instruction *I = Worklist.back(); + Worklist.pop_back(); + + if (SeenInsts.find(I) != SeenInsts.end()) + // Seen already. + continue; + + switch (I->getOpcode()) { + // FIXME: Handle select and phi nodes. + default: + // Path terminated uncleanly. + seen(I, badRange()); + break; + + case Instruction::UIToFP: { + // Path terminated cleanly. + unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits(); + APInt Min = APInt::getMinValue(BW).zextOrSelf(MaxIntegerBW+1); + APInt Max = APInt::getMaxValue(BW).zextOrSelf(MaxIntegerBW+1); + seen(I, validateRange(ConstantRange(Min, Max))); + continue; + } + + case Instruction::SIToFP: { + // Path terminated cleanly. + unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits(); + APInt SMin = APInt::getSignedMinValue(BW).sextOrSelf(MaxIntegerBW+1); + APInt SMax = APInt::getSignedMaxValue(BW).sextOrSelf(MaxIntegerBW+1); + seen(I, validateRange(ConstantRange(SMin, SMax))); + continue; + } + + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FCmp: + seen(I, unknownRange()); + break; + } + + for (Value *O : I->operands()) { + if (Instruction *OI = dyn_cast<Instruction>(O)) { + // Unify def-use chains if they interfere. + ECs.unionSets(I, OI); + if (SeenInsts.find(I)->second != badRange()) + Worklist.push_back(OI); + } else if (!isa<ConstantFP>(O)) { + // Not an instruction or ConstantFP? we can't do anything. + seen(I, badRange()); + } + } + } +} + +// Walk forwards down the list of seen instructions, so we visit defs before +// uses. +void Float2Int::walkForwards() { + for (auto It = SeenInsts.rbegin(), E = SeenInsts.rend(); It != E; ++It) { + if (It->second != unknownRange()) + continue; + + Instruction *I = It->first; + std::function<ConstantRange(ArrayRef<ConstantRange>)> Op; + switch (I->getOpcode()) { + // FIXME: Handle select and phi nodes. + default: + case Instruction::UIToFP: + case Instruction::SIToFP: + llvm_unreachable("Should have been handled in walkForwards!"); + + case Instruction::FAdd: + Op = [](ArrayRef<ConstantRange> Ops) { + assert(Ops.size() == 2 && "FAdd is a binary operator!"); + return Ops[0].add(Ops[1]); + }; + break; + + case Instruction::FSub: + Op = [](ArrayRef<ConstantRange> Ops) { + assert(Ops.size() == 2 && "FSub is a binary operator!"); + return Ops[0].sub(Ops[1]); + }; + break; + + case Instruction::FMul: + Op = [](ArrayRef<ConstantRange> Ops) { + assert(Ops.size() == 2 && "FMul is a binary operator!"); + return Ops[0].multiply(Ops[1]); + }; + break; + + // + // Root-only instructions - we'll only see these if they're the + // first node in a walk. + // + case Instruction::FPToUI: + case Instruction::FPToSI: + Op = [](ArrayRef<ConstantRange> Ops) { + assert(Ops.size() == 1 && "FPTo[US]I is a unary operator!"); + return Ops[0]; + }; + break; + + case Instruction::FCmp: + Op = [](ArrayRef<ConstantRange> Ops) { + assert(Ops.size() == 2 && "FCmp is a binary operator!"); + return Ops[0].unionWith(Ops[1]); + }; + break; + } + + bool Abort = false; + SmallVector<ConstantRange,4> OpRanges; + for (Value *O : I->operands()) { + if (Instruction *OI = dyn_cast<Instruction>(O)) { + assert(SeenInsts.find(OI) != SeenInsts.end() && + "def not seen before use!"); + OpRanges.push_back(SeenInsts.find(OI)->second); + } else if (ConstantFP *CF = dyn_cast<ConstantFP>(O)) { + // Work out if the floating point number can be losslessly represented + // as an integer. + // APFloat::convertToInteger(&Exact) purports to do what we want, but + // the exactness can be too precise. For example, negative zero can + // never be exactly converted to an integer. + // + // Instead, we ask APFloat to round itself to an integral value - this + // preserves sign-of-zero - then compare the result with the original. + // + APFloat F = CF->getValueAPF(); + + // First, weed out obviously incorrect values. Non-finite numbers + // can't be represented and neither can negative zero, unless + // we're in fast math mode. + if (!F.isFinite() || + (F.isZero() && F.isNegative() && isa<FPMathOperator>(I) && + !I->hasNoSignedZeros())) { + seen(I, badRange()); + Abort = true; + break; + } + + APFloat NewF = F; + auto Res = NewF.roundToIntegral(APFloat::rmNearestTiesToEven); + if (Res != APFloat::opOK || NewF.compare(F) != APFloat::cmpEqual) { + seen(I, badRange()); + Abort = true; + break; + } + // OK, it's representable. Now get it. + APSInt Int(MaxIntegerBW+1, false); + bool Exact; + CF->getValueAPF().convertToInteger(Int, + APFloat::rmNearestTiesToEven, + &Exact); + OpRanges.push_back(ConstantRange(Int)); + } else { + llvm_unreachable("Should have already marked this as badRange!"); + } + } + + // Reduce the operands' ranges to a single range and return. + if (!Abort) + seen(I, Op(OpRanges)); + } +} + +// If there is a valid transform to be done, do it. +bool Float2Int::validateAndTransform() { + bool MadeChange = false; + + // Iterate over every disjoint partition of the def-use graph. + for (auto It = ECs.begin(), E = ECs.end(); It != E; ++It) { + ConstantRange R(MaxIntegerBW + 1, false); + bool Fail = false; + Type *ConvertedToTy = nullptr; + + // For every member of the partition, union all the ranges together. + for (auto MI = ECs.member_begin(It), ME = ECs.member_end(); + MI != ME; ++MI) { + Instruction *I = *MI; + auto SeenI = SeenInsts.find(I); + if (SeenI == SeenInsts.end()) + continue; + + R = R.unionWith(SeenI->second); + // We need to ensure I has no users that have not been seen. + // If it does, transformation would be illegal. + // + // Don't count the roots, as they terminate the graphs. + if (Roots.count(I) == 0) { + // Set the type of the conversion while we're here. + if (!ConvertedToTy) + ConvertedToTy = I->getType(); + for (User *U : I->users()) { + Instruction *UI = dyn_cast<Instruction>(U); + if (!UI || SeenInsts.find(UI) == SeenInsts.end()) { + DEBUG(dbgs() << "F2I: Failing because of " << *U << "\n"); + Fail = true; + break; + } + } + } + if (Fail) + break; + } + + // If the set was empty, or we failed, or the range is poisonous, + // bail out. + if (ECs.member_begin(It) == ECs.member_end() || Fail || + R.isFullSet() || R.isSignWrappedSet()) + continue; + assert(ConvertedToTy && "Must have set the convertedtoty by this point!"); + + // The number of bits required is the maximum of the upper and + // lower limits, plus one so it can be signed. + unsigned MinBW = std::max(R.getLower().getMinSignedBits(), + R.getUpper().getMinSignedBits()) + 1; + DEBUG(dbgs() << "F2I: MinBitwidth=" << MinBW << ", R: " << R << "\n"); + + // If we've run off the realms of the exactly representable integers, + // the floating point result will differ from an integer approximation. + + // Do we need more bits than are in the mantissa of the type we converted + // to? semanticsPrecision returns the number of mantissa bits plus one + // for the sign bit. + unsigned MaxRepresentableBits + = APFloat::semanticsPrecision(ConvertedToTy->getFltSemantics()) - 1; + if (MinBW > MaxRepresentableBits) { + DEBUG(dbgs() << "F2I: Value not guaranteed to be representable!\n"); + continue; + } + if (MinBW > 64) { + DEBUG(dbgs() << "F2I: Value requires more than 64 bits to represent!\n"); + continue; + } + + // OK, R is known to be representable. Now pick a type for it. + // FIXME: Pick the smallest legal type that will fit. + Type *Ty = (MinBW > 32) ? Type::getInt64Ty(*Ctx) : Type::getInt32Ty(*Ctx); + + for (auto MI = ECs.member_begin(It), ME = ECs.member_end(); + MI != ME; ++MI) + convert(*MI, Ty); + MadeChange = true; + } + + return MadeChange; +} + +Value *Float2Int::convert(Instruction *I, Type *ToTy) { + if (ConvertedInsts.find(I) != ConvertedInsts.end()) + // Already converted this instruction. + return ConvertedInsts[I]; + + SmallVector<Value*,4> NewOperands; + for (Value *V : I->operands()) { + // Don't recurse if we're an instruction that terminates the path. + if (I->getOpcode() == Instruction::UIToFP || + I->getOpcode() == Instruction::SIToFP) { + NewOperands.push_back(V); + } else if (Instruction *VI = dyn_cast<Instruction>(V)) { + NewOperands.push_back(convert(VI, ToTy)); + } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) { + APSInt Val(ToTy->getPrimitiveSizeInBits(), /*IsUnsigned=*/false); + bool Exact; + CF->getValueAPF().convertToInteger(Val, + APFloat::rmNearestTiesToEven, + &Exact); + NewOperands.push_back(ConstantInt::get(ToTy, Val)); + } else { + llvm_unreachable("Unhandled operand type?"); + } + } + + // Now create a new instruction. + IRBuilder<> IRB(I); + Value *NewV = nullptr; + switch (I->getOpcode()) { + default: llvm_unreachable("Unhandled instruction!"); + + case Instruction::FPToUI: + NewV = IRB.CreateZExtOrTrunc(NewOperands[0], I->getType()); + break; + + case Instruction::FPToSI: + NewV = IRB.CreateSExtOrTrunc(NewOperands[0], I->getType()); + break; + + case Instruction::FCmp: { + CmpInst::Predicate P = mapFCmpPred(cast<CmpInst>(I)->getPredicate()); + assert(P != CmpInst::BAD_ICMP_PREDICATE && "Unhandled predicate!"); + NewV = IRB.CreateICmp(P, NewOperands[0], NewOperands[1], I->getName()); + break; + } + + case Instruction::UIToFP: + NewV = IRB.CreateZExtOrTrunc(NewOperands[0], ToTy); + break; + + case Instruction::SIToFP: + NewV = IRB.CreateSExtOrTrunc(NewOperands[0], ToTy); + break; + + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + NewV = IRB.CreateBinOp(mapBinOpcode(I->getOpcode()), + NewOperands[0], NewOperands[1], + I->getName()); + break; + } + + // If we're a root instruction, RAUW. + if (Roots.count(I)) + I->replaceAllUsesWith(NewV); + + ConvertedInsts[I] = NewV; + return NewV; +} + +// Perform dead code elimination on the instructions we just modified. +void Float2Int::cleanup() { + for (auto I = ConvertedInsts.rbegin(), E = ConvertedInsts.rend(); + I != E; ++I) + I->first->eraseFromParent(); +} + +bool Float2Int::runOnFunction(Function &F) { + if (skipOptnoneFunction(F)) + return false; + + DEBUG(dbgs() << "F2I: Looking at function " << F.getName() << "\n"); + // Clear out all state. + ECs = EquivalenceClasses<Instruction*>(); + SeenInsts.clear(); + ConvertedInsts.clear(); + Roots.clear(); + + Ctx = &F.getParent()->getContext(); + + findRoots(F, Roots); + + walkBackwards(Roots); + walkForwards(); + + bool Modified = validateAndTransform(); + if (Modified) + cleanup(); + return Modified; +} + +FunctionPass *llvm::createFloat2IntPass() { + return new Float2Int(); +} + diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index c73e60f..97d5b6d 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -1102,7 +1102,8 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, Type::getInt8PtrTy(Src->getContext(), AS)); Constant *OffsetCst = ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); - Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); + Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, + OffsetCst); Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); if (ConstantFoldLoadFromConstPtr(Src, DL)) return Offset; @@ -1263,7 +1264,8 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, Type::getInt8PtrTy(Src->getContext(), AS)); Constant *OffsetCst = ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); - Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); + Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, + OffsetCst); Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); return ConstantFoldLoadFromConstPtr(Src, DL); } diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 51e8041..ab8e5b8 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1274,55 +1274,6 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L, // LinearFunctionTestReplace and its kin. Rewrite the loop exit condition. //===----------------------------------------------------------------------===// -/// Check for expressions that ScalarEvolution generates to compute -/// BackedgeTakenInfo. If these expressions have not been reduced, then -/// expanding them may incur additional cost (albeit in the loop preheader). -static bool isHighCostExpansion(const SCEV *S, BranchInst *BI, - SmallPtrSetImpl<const SCEV*> &Processed, - ScalarEvolution *SE) { - if (!Processed.insert(S).second) - return false; - - // If the backedge-taken count is a UDiv, it's very likely a UDiv that - // ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a - // precise expression, rather than a UDiv from the user's code. If we can't - // find a UDiv in the code with some simple searching, assume the former and - // forego rewriting the loop. - if (isa<SCEVUDivExpr>(S)) { - ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition()); - if (!OrigCond) return true; - const SCEV *R = SE->getSCEV(OrigCond->getOperand(1)); - R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1)); - if (R != S) { - const SCEV *L = SE->getSCEV(OrigCond->getOperand(0)); - L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1)); - if (L != S) - return true; - } - } - - // Recurse past add expressions, which commonly occur in the - // BackedgeTakenCount. They may already exist in program code, and if not, - // they are not too expensive rematerialize. - if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { - for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); - I != E; ++I) { - if (isHighCostExpansion(*I, BI, Processed, SE)) - return true; - } - return false; - } - - // HowManyLessThans uses a Max expression whenever the loop is not guarded by - // the exit condition. - if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S)) - return true; - - // If we haven't recognized an expensive SCEV pattern, assume it's an - // expression produced by program code. - return false; -} - /// canExpandBackedgeTakenCount - Return true if this loop's backedge taken /// count expression can be safely and cheaply expanded into an instruction /// sequence that can be used by LinearFunctionTestReplace. @@ -1336,7 +1287,8 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI, /// used by ABI constrained operation, as opposed to inttoptr/ptrtoint). /// However, we don't yet have a strong motivation for converting loop tests /// into inequality tests. -static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { +static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE, + SCEVExpander &Rewriter) { const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) || BackedgeTakenCount->isZero()) @@ -1346,12 +1298,10 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { return false; // Can't rewrite non-branch yet. - BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); - if (!BI) + if (!isa<BranchInst>(L->getExitingBlock()->getTerminator())) return false; - SmallPtrSet<const SCEV*, 8> Processed; - if (isHighCostExpansion(BackedgeTakenCount, BI, Processed, SE)) + if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L)) return false; return true; @@ -1637,7 +1587,7 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, && "unit stride pointer IV must be i8*"); IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); - return Builder.CreateGEP(GEPBase, GEPOffset, "lftr.limit"); + return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit"); } else { // In any other case, convert both IVInit and IVCount to integers before @@ -1691,7 +1641,7 @@ LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, PHINode *IndVar, SCEVExpander &Rewriter) { - assert(canExpandBackedgeTakenCount(L, SE) && "precondition"); + assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition"); // Initialize CmpIndVar and IVCount to their preincremented values. Value *CmpIndVar = IndVar; @@ -1936,7 +1886,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // If we have a trip count expression, rewrite the loop's exit condition // using it. We can currently only handle loops with a single exit. - if (canExpandBackedgeTakenCount(L, SE) && needsLFTR(L, DT)) { + if (canExpandBackedgeTakenCount(L, SE, Rewriter) && needsLFTR(L, DT)) { PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT); if (IndVar) { // Check preconditions for proper SCEVExpander operation. SCEV does not diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp index 1f33f72..c19cd19 100644 --- a/lib/Transforms/Scalar/LoadCombine.cpp +++ b/lib/Transforms/Scalar/LoadCombine.cpp @@ -41,9 +41,9 @@ struct PointerOffsetPair { }; struct LoadPOPPair { + LoadPOPPair() = default; LoadPOPPair(LoadInst *L, PointerOffsetPair P, unsigned O) : Load(L), POP(P), InsertOrder(O) {} - LoadPOPPair() {} LoadInst *Load; PointerOffsetPair POP; /// \brief The new load needs to be created before the first load in IR order. diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 8445d5f..099f227 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -28,7 +28,7 @@ // // The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however // it's useful to think about these as the same register, with some uses using -// the value of the register before the add and some using // it after. In this +// the value of the register before the add and some using it after. In this // example, the icmp is a post-increment user, since it uses %i.next, which is // the value of the induction variable after the increment. The other common // case of post-increment users is users outside the loop. @@ -112,8 +112,6 @@ public: /// a particular register. SmallBitVector UsedByIndices; - RegSortData() {} - void print(raw_ostream &OS) const; void dump() const; }; diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 600cbde..3de3b3d 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -165,6 +165,7 @@ namespace { UP.MaxCount = UINT_MAX; UP.Partial = CurrentAllowPartial; UP.Runtime = CurrentRuntime; + UP.AllowExpensiveTripCount = false; TTI.getUnrollingPreferences(L, UP); } @@ -886,8 +887,8 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { } // Unroll the loop. - if (!UnrollLoop(L, Count, TripCount, AllowRuntime, TripMultiple, LI, this, - &LPM, &AC)) + if (!UnrollLoop(L, Count, TripCount, AllowRuntime, UP.AllowExpensiveTripCount, + TripMultiple, LI, this, &LPM, &AC)) return false; return true; diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 2b5a078..d651473 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1045,7 +1045,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) { RepeatInstruction = processMemCpy(M); else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) RepeatInstruction = processMemMove(M); - else if (CallSite CS = (Value*)I) { + else if (auto CS = CallSite(I)) { for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) if (CS.isByValArgument(i)) MadeChange |= processByValArgument(CS, i); diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp new file mode 100644 index 0000000..fea7641 --- /dev/null +++ b/lib/Transforms/Scalar/NaryReassociate.cpp @@ -0,0 +1,252 @@ +//===- NaryReassociate.cpp - Reassociate n-ary expressions ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass reassociates n-ary add expressions and eliminates the redundancy +// exposed by the reassociation. +// +// A motivating example: +// +// void foo(int a, int b) { +// bar(a + b); +// bar((a + 2) + b); +// } +// +// An ideal compiler should reassociate (a + 2) + b to (a + b) + 2 and simplify +// the above code to +// +// int t = a + b; +// bar(t); +// bar(t + 2); +// +// However, the Reassociate pass is unable to do that because it processes each +// instruction individually and believes (a + 2) + b is the best form according +// to its rank system. +// +// To address this limitation, NaryReassociate reassociates an expression in a +// form that reuses existing instructions. As a result, NaryReassociate can +// reassociate (a + 2) + b in the example to (a + b) + 2 because it detects that +// (a + b) is computed before. +// +// NaryReassociate works as follows. For every instruction in the form of (a + +// b) + c, it checks whether a + c or b + c is already computed by a dominating +// instruction. If so, it then reassociates (a + b) + c into (a + c) + b or (b + +// c) + a and removes the redundancy accordingly. To efficiently look up whether +// an expression is computed before, we store each instruction seen and its SCEV +// into an SCEV-to-instruction map. +// +// Although the algorithm pattern-matches only ternary additions, it +// automatically handles many >3-ary expressions by walking through the function +// in the depth-first order. For example, given +// +// (a + c) + d +// ((a + b) + c) + d +// +// NaryReassociate first rewrites (a + b) + c to (a + c) + b, and then rewrites +// ((a + c) + b) + d into ((a + c) + d) + b. +// +// Finally, the above dominator-based algorithm may need to be run multiple +// iterations before emitting optimal code. One source of this need is that we +// only split an operand when it is used only once. The above algorithm can +// eliminate an instruction and decrease the usage count of its operands. As a +// result, an instruction that previously had multiple uses may become a +// single-use instruction and thus eligible for split consideration. For +// example, +// +// ac = a + c +// ab = a + b +// abc = ab + c +// ab2 = ab + b +// ab2c = ab2 + c +// +// In the first iteration, we cannot reassociate abc to ac+b because ab is used +// twice. However, we can reassociate ab2c to abc+b in the first iteration. As a +// result, ab2 becomes dead and ab will be used only once in the second +// iteration. +// +// Limitations and TODO items: +// +// 1) We only considers n-ary adds for now. This should be extended and +// generalized. +// +// 2) Besides arithmetic operations, similar reassociation can be applied to +// GEPs. For example, if +// X = &arr[a] +// dominates +// Y = &arr[a + b] +// we may rewrite Y into X + b. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" +using namespace llvm; +using namespace PatternMatch; + +#define DEBUG_TYPE "nary-reassociate" + +namespace { +class NaryReassociate : public FunctionPass { +public: + static char ID; + + NaryReassociate(): FunctionPass(ID) { + initializeNaryReassociatePass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addPreserved<ScalarEvolution>(); + AU.addPreserved<TargetLibraryInfoWrapperPass>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<ScalarEvolution>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.setPreservesCFG(); + } + +private: + // Runs only one iteration of the dominator-based algorithm. See the header + // comments for why we need multiple iterations. + bool doOneIteration(Function &F); + // Reasssociates I to a better form. + Instruction *tryReassociateAdd(Instruction *I); + // A helper function for tryReassociateAdd. LHS and RHS are explicitly passed. + Instruction *tryReassociateAdd(Value *LHS, Value *RHS, Instruction *I); + // Rewrites I to LHS + RHS if LHS is computed already. + Instruction *tryReassociatedAdd(const SCEV *LHS, Value *RHS, Instruction *I); + + DominatorTree *DT; + ScalarEvolution *SE; + TargetLibraryInfo *TLI; + // A lookup table quickly telling which instructions compute the given SCEV. + // Note that there can be multiple instructions at different locations + // computing to the same SCEV, so we map a SCEV to an instruction list. For + // example, + // + // if (p1) + // foo(a + b); + // if (p2) + // bar(a + b); + DenseMap<const SCEV *, SmallVector<Instruction *, 2>> SeenExprs; +}; +} // anonymous namespace + +char NaryReassociate::ID = 0; +INITIALIZE_PASS_BEGIN(NaryReassociate, "nary-reassociate", "Nary reassociation", + false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(NaryReassociate, "nary-reassociate", "Nary reassociation", + false, false) + +FunctionPass *llvm::createNaryReassociatePass() { + return new NaryReassociate(); +} + +bool NaryReassociate::runOnFunction(Function &F) { + if (skipOptnoneFunction(F)) + return false; + + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + SE = &getAnalysis<ScalarEvolution>(); + TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + + bool Changed = false, ChangedInThisIteration; + do { + ChangedInThisIteration = doOneIteration(F); + Changed |= ChangedInThisIteration; + } while (ChangedInThisIteration); + return Changed; +} + +bool NaryReassociate::doOneIteration(Function &F) { + bool Changed = false; + SeenExprs.clear(); + // Traverse the dominator tree in the depth-first order. This order makes sure + // all bases of a candidate are in Candidates when we process it. + for (auto Node = GraphTraits<DominatorTree *>::nodes_begin(DT); + Node != GraphTraits<DominatorTree *>::nodes_end(DT); ++Node) { + BasicBlock *BB = Node->getBlock(); + for (auto I = BB->begin(); I != BB->end(); ++I) { + if (I->getOpcode() == Instruction::Add) { + if (Instruction *NewI = tryReassociateAdd(I)) { + Changed = true; + SE->forgetValue(I); + I->replaceAllUsesWith(NewI); + RecursivelyDeleteTriviallyDeadInstructions(I, TLI); + I = NewI; + } + // We should add the rewritten instruction because tryReassociateAdd may + // have invalidated the original one. + SeenExprs[SE->getSCEV(I)].push_back(I); + } + } + } + return Changed; +} + +Instruction *NaryReassociate::tryReassociateAdd(Instruction *I) { + Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); + if (auto *NewI = tryReassociateAdd(LHS, RHS, I)) + return NewI; + if (auto *NewI = tryReassociateAdd(RHS, LHS, I)) + return NewI; + return nullptr; +} + +Instruction *NaryReassociate::tryReassociateAdd(Value *LHS, Value *RHS, + Instruction *I) { + Value *A = nullptr, *B = nullptr; + // To be conservative, we reassociate I only when it is the only user of A+B. + if (LHS->hasOneUse() && match(LHS, m_Add(m_Value(A), m_Value(B)))) { + // I = (A + B) + RHS + // = (A + RHS) + B or (B + RHS) + A + const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B); + const SCEV *RHSExpr = SE->getSCEV(RHS); + if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(AExpr, RHSExpr), B, I)) + return NewI; + if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(BExpr, RHSExpr), A, I)) + return NewI; + } + return nullptr; +} + +Instruction *NaryReassociate::tryReassociatedAdd(const SCEV *LHSExpr, + Value *RHS, Instruction *I) { + auto Pos = SeenExprs.find(LHSExpr); + // Bail out if LHSExpr is not previously seen. + if (Pos == SeenExprs.end()) + return nullptr; + + auto &LHSCandidates = Pos->second; + // Look for the closest dominator LHS of I that computes LHSExpr, and replace + // I with LHS + RHS. + // + // Because we traverse the dominator tree in the pre-order, a + // candidate that doesn't dominate the current instruction won't dominate any + // future instruction either. Therefore, we pop it out of the stack. This + // optimization makes the algorithm O(n). + while (!LHSCandidates.empty()) { + Instruction *LHS = LHSCandidates.back(); + if (DT->dominates(LHS, I)) { + Instruction *NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I); + NewI->takeName(I); + return NewI; + } + LHSCandidates.pop_back(); + } + return nullptr; +} diff --git a/lib/Transforms/Scalar/PlaceSafepoints.cpp b/lib/Transforms/Scalar/PlaceSafepoints.cpp index 944725a..536f2a6 100644 --- a/lib/Transforms/Scalar/PlaceSafepoints.cpp +++ b/lib/Transforms/Scalar/PlaceSafepoints.cpp @@ -217,7 +217,7 @@ static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header, BasicBlock *Current = Pred; while (true) { for (Instruction &I : *Current) { - if (CallSite CS = &I) + if (auto CS = CallSite(&I)) // Note: Technically, needing a safepoint isn't quite the right // condition here. We should instead be checking if the target method // has an @@ -424,8 +424,7 @@ static Instruction *findLocationForEntrySafepoint(Function &F, // We need to stop going forward as soon as we see a call that can // grow the stack (i.e. the call target has a non-zero frame // size). - if (CallSite CS = cursor) { - (void)CS; // Silence an unused variable warning by gcc 4.8.2 + if (CallSite(cursor)) { if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(cursor)) { // llvm.assume(...) are not really calls. if (II->getIntrinsicID() == Intrinsic::assume) { diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index f5d21ff..ba48e0a 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetVector.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Dominators.h" @@ -49,11 +50,20 @@ static cl::opt<bool> TraceLSP("trace-rewrite-statepoints", cl::Hidden, // Print the liveset found at the insert location static cl::opt<bool> PrintLiveSet("spp-print-liveset", cl::Hidden, cl::init(false)); -static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", - cl::Hidden, cl::init(false)); +static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden, + cl::init(false)); // Print out the base pointers for debugging -static cl::opt<bool> PrintBasePointers("spp-print-base-pointers", - cl::Hidden, cl::init(false)); +static cl::opt<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden, + cl::init(false)); + +#ifdef XDEBUG +static bool ClobberNonLive = true; +#else +static bool ClobberNonLive = false; +#endif +static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live", + cl::location(ClobberNonLive), + cl::Hidden); namespace { struct RewriteStatepointsForGC : public FunctionPass { @@ -85,6 +95,22 @@ INITIALIZE_PASS_END(RewriteStatepointsForGC, "rewrite-statepoints-for-gc", "Make relocations explicit at statepoints", false, false) namespace { +struct GCPtrLivenessData { + /// Values defined in this block. + DenseMap<BasicBlock *, DenseSet<Value *>> KillSet; + /// Values used in this block (and thus live); does not included values + /// killed within this block. + DenseMap<BasicBlock *, DenseSet<Value *>> LiveSet; + + /// Values live into this basic block (i.e. used by any + /// instruction in this basic block or ones reachable from here) + DenseMap<BasicBlock *, DenseSet<Value *>> LiveIn; + + /// Values live out of this basic block (i.e. live into + /// any successor block) + DenseMap<BasicBlock *, DenseSet<Value *>> LiveOut; +}; + // The type of the internal cache used inside the findBasePointers family // of functions. From the callers perspective, this is an opaque type and // should not be inspected. @@ -105,10 +131,6 @@ struct PartiallyConstructedSafepointRecord { /// Mapping from live pointers to a base-defining-value DenseMap<llvm::Value *, llvm::Value *> PointerToBase; - /// Any new values which were added to the IR during base pointer analysis - /// for this safepoint - DenseSet<llvm::Value *> NewInsertedDefs; - /// The *new* gc.statepoint instruction itself. This produces the token /// that normal path gc.relocates and the gc.result are tied to. Instruction *StatepointToken; @@ -119,6 +141,15 @@ struct PartiallyConstructedSafepointRecord { }; } +/// Compute the live-in set for every basic block in the function +static void computeLiveInValues(DominatorTree &DT, Function &F, + GCPtrLivenessData &Data); + +/// Given results from the dataflow liveness computation, find the set of live +/// Values at a particular instruction. +static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data, + StatepointLiveSetTy &out); + // TODO: Once we can get to the GCStrategy, this becomes // Optional<bool> isGCManagedPointer(const Value *V) const override { @@ -131,129 +162,46 @@ static bool isGCPointerType(const Type *T) { return false; } -/// Return true if the Value is a gc reference type which is potentially used -/// after the instruction 'loc'. This is only used with the edge reachability -/// liveness code. Note: It is assumed the V dominates loc. -static bool isLiveGCReferenceAt(Value &V, Instruction *loc, DominatorTree &DT, - LoopInfo *LI) { - if (!isGCPointerType(V.getType())) - return false; - - if (V.use_empty()) - return false; - - // Given assumption that V dominates loc, this may be live - return true; +// Return true if this type is one which a) is a gc pointer or contains a GC +// pointer and b) is of a type this code expects to encounter as a live value. +// (The insertion code will assert that a type which matches (a) and not (b) +// is not encountered.) +static bool isHandledGCPointerType(Type *T) { + // We fully support gc pointers + if (isGCPointerType(T)) + return true; + // We partially support vectors of gc pointers. The code will assert if it + // can't handle something. + if (auto VT = dyn_cast<VectorType>(T)) + if (isGCPointerType(VT->getElementType())) + return true; + return false; } #ifndef NDEBUG -static bool isAggWhichContainsGCPtrType(Type *Ty) { +/// Returns true if this type contains a gc pointer whether we know how to +/// handle that type or not. +static bool containsGCPtrType(Type *Ty) { + if (isGCPointerType(Ty)) + return true; if (VectorType *VT = dyn_cast<VectorType>(Ty)) return isGCPointerType(VT->getScalarType()); if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) - return isGCPointerType(AT->getElementType()) || - isAggWhichContainsGCPtrType(AT->getElementType()); + return containsGCPtrType(AT->getElementType()); if (StructType *ST = dyn_cast<StructType>(Ty)) - return std::any_of(ST->subtypes().begin(), ST->subtypes().end(), - [](Type *SubType) { - return isGCPointerType(SubType) || - isAggWhichContainsGCPtrType(SubType); - }); + return std::any_of( + ST->subtypes().begin(), ST->subtypes().end(), + [](Type *SubType) { return containsGCPtrType(SubType); }); return false; } -#endif - -// Conservatively identifies any definitions which might be live at the -// given instruction. The analysis is performed immediately before the -// given instruction. Values defined by that instruction are not considered -// live. Values used by that instruction are considered live. -// -// preconditions: valid IR graph, term is either a terminator instruction or -// a call instruction, pred is the basic block of term, DT, LI are valid -// -// side effects: none, does not mutate IR -// -// postconditions: populates liveValues as discussed above -static void findLiveGCValuesAtInst(Instruction *term, BasicBlock *pred, - DominatorTree &DT, LoopInfo *LI, - StatepointLiveSetTy &liveValues) { - liveValues.clear(); - - assert(isa<CallInst>(term) || isa<InvokeInst>(term) || term->isTerminator()); - Function *F = pred->getParent(); - - auto is_live_gc_reference = - [&](Value &V) { return isLiveGCReferenceAt(V, term, DT, LI); }; - - // Are there any gc pointer arguments live over this point? This needs to be - // special cased since arguments aren't defined in basic blocks. - for (Argument &arg : F->args()) { - assert(!isAggWhichContainsGCPtrType(arg.getType()) && - "support for FCA unimplemented"); - - if (is_live_gc_reference(arg)) { - liveValues.insert(&arg); - } - } - - // Walk through all dominating blocks - the ones which can contain - // definitions used in this block - and check to see if any of the values - // they define are used in locations potentially reachable from the - // interesting instruction. - BasicBlock *BBI = pred; - while (true) { - if (TraceLSP) { - errs() << "[LSP] Looking at dominating block " << pred->getName() << "\n"; - } - assert(DT.dominates(BBI, pred)); - assert(isPotentiallyReachable(BBI, pred, &DT) && - "dominated block must be reachable"); - - // Walk through the instructions in dominating blocks and keep any - // that have a use potentially reachable from the block we're - // considering putting the safepoint in - for (Instruction &inst : *BBI) { - if (TraceLSP) { - errs() << "[LSP] Looking at instruction "; - inst.dump(); - } - - if (pred == BBI && (&inst) == term) { - if (TraceLSP) { - errs() << "[LSP] stopped because we encountered the safepoint " - "instruction.\n"; - } - - // If we're in the block which defines the interesting instruction, - // we don't want to include any values as live which are defined - // _after_ the interesting line or as part of the line itself - // i.e. "term" is the call instruction for a call safepoint, the - // results of the call should not be considered live in that stackmap - break; - } - - assert(!isAggWhichContainsGCPtrType(inst.getType()) && - "support for FCA unimplemented"); - - if (is_live_gc_reference(inst)) { - if (TraceLSP) { - errs() << "[LSP] found live value for this safepoint "; - inst.dump(); - term->dump(); - } - liveValues.insert(&inst); - } - } - if (!DT.getNode(BBI)->getIDom()) { - assert(BBI == &F->getEntryBlock() && - "failed to find a dominator for something other than " - "the entry block"); - break; - } - BBI = DT.getNode(BBI)->getIDom()->getBlock(); - } +// Returns true if this is a type which a) is a gc pointer or contains a GC +// pointer and b) is of a type which the code doesn't expect (i.e. first class +// aggregates). Used to trip assertions. +static bool isUnhandledGCPointerType(Type *Ty) { + return containsGCPtrType(Ty) && !isHandledGCPointerType(Ty); } +#endif static bool order_by_name(llvm::Value *a, llvm::Value *b) { if (a->hasName() && b->hasName()) { @@ -268,16 +216,17 @@ static bool order_by_name(llvm::Value *a, llvm::Value *b) { } } -/// Find the initial live set. Note that due to base pointer -/// insertion, the live set may be incomplete. -static void -analyzeParsePointLiveness(DominatorTree &DT, const CallSite &CS, - PartiallyConstructedSafepointRecord &result) { +// Conservatively identifies any definitions which might be live at the +// given instruction. The analysis is performed immediately before the +// given instruction. Values defined by that instruction are not considered +// live. Values used by that instruction are considered live. +static void analyzeParsePointLiveness( + DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData, + const CallSite &CS, PartiallyConstructedSafepointRecord &result) { Instruction *inst = CS.getInstruction(); - BasicBlock *BB = inst->getParent(); StatepointLiveSetTy liveset; - findLiveGCValuesAtInst(inst, BB, DT, nullptr, liveset); + findLiveSetAtInst(inst, OriginalLivenessData, liveset); if (PrintLiveSet) { // Note: This output is used by several of the test cases @@ -299,10 +248,49 @@ analyzeParsePointLiveness(DominatorTree &DT, const CallSite &CS, result.liveset = liveset; } -/// True iff this value is the null pointer constant (of any pointer type) -static bool LLVM_ATTRIBUTE_UNUSED isNullConstant(Value *V) { - return isa<Constant>(V) && isa<PointerType>(V->getType()) && - cast<Constant>(V)->isNullValue(); +/// If we can trivially determine that this vector contains only base pointers, +/// return the base instruction. +static Value *findBaseOfVector(Value *I) { + assert(I->getType()->isVectorTy() && + cast<VectorType>(I->getType())->getElementType()->isPointerTy() && + "Illegal to ask for the base pointer of a non-pointer type"); + + // Each case parallels findBaseDefiningValue below, see that code for + // detailed motivation. + + if (isa<Argument>(I)) + // An incoming argument to the function is a base pointer + return I; + + // We shouldn't see the address of a global as a vector value? + assert(!isa<GlobalVariable>(I) && + "unexpected global variable found in base of vector"); + + // inlining could possibly introduce phi node that contains + // undef if callee has multiple returns + if (isa<UndefValue>(I)) + // utterly meaningless, but useful for dealing with partially optimized + // code. + return I; + + // Due to inheritance, this must be _after_ the global variable and undef + // checks + if (Constant *Con = dyn_cast<Constant>(I)) { + assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) && + "order of checks wrong!"); + assert(Con->isNullValue() && "null is the only case which makes sense"); + return Con; + } + + if (isa<LoadInst>(I)) + return I; + + // Note: This code is currently rather incomplete. We are essentially only + // handling cases where the vector element is trivially a base pointer. We + // need to update the entire base pointer construction algorithm to know how + // to track vector elements and potentially scalarize, but the case which + // would motivate the work hasn't shown up in real workloads yet. + llvm_unreachable("no base found for vector element"); } /// Helper function for findBasePointer - Will return a value which either a) @@ -312,52 +300,36 @@ static Value *findBaseDefiningValue(Value *I) { assert(I->getType()->isPointerTy() && "Illegal to ask for the base pointer of a non-pointer type"); - // There are instructions which can never return gc pointer values. Sanity - // check - // that this is actually true. - assert(!isa<InsertElementInst>(I) && !isa<ExtractElementInst>(I) && - !isa<ShuffleVectorInst>(I) && "Vector types are not gc pointers"); - assert((!isa<Instruction>(I) || isa<InvokeInst>(I) || - !cast<Instruction>(I)->isTerminator()) && - "With the exception of invoke terminators don't define values"); - assert(!isa<StoreInst>(I) && !isa<FenceInst>(I) && - "Can't be definitions to start with"); - assert(!isa<ICmpInst>(I) && !isa<FCmpInst>(I) && - "Comparisons don't give ops"); - // There's a bunch of instructions which just don't make sense to apply to - // a pointer. The only valid reason for this would be pointer bit - // twiddling which we're just not going to support. - assert((!isa<Instruction>(I) || !cast<Instruction>(I)->isBinaryOp()) && - "Binary ops on pointer values are meaningless. Unless your " - "bit-twiddling which we don't support"); - - if (Argument *Arg = dyn_cast<Argument>(I)) { + // This case is a bit of a hack - it only handles extracts from vectors which + // trivially contain only base pointers. See note inside the function for + // how to improve this. + if (auto *EEI = dyn_cast<ExtractElementInst>(I)) { + Value *VectorOperand = EEI->getVectorOperand(); + Value *VectorBase = findBaseOfVector(VectorOperand); + (void)VectorBase; + assert(VectorBase && "extract element not known to be a trivial base"); + return EEI; + } + + if (isa<Argument>(I)) // An incoming argument to the function is a base pointer // We should have never reached here if this argument isn't an gc value - assert(Arg->getType()->isPointerTy() && - "Base for pointer must be another pointer"); - return Arg; - } + return I; - if (GlobalVariable *global = dyn_cast<GlobalVariable>(I)) { + if (isa<GlobalVariable>(I)) // base case - assert(global->getType()->isPointerTy() && - "Base for pointer must be another pointer"); - return global; - } + return I; // inlining could possibly introduce phi node that contains // undef if callee has multiple returns - if (UndefValue *undef = dyn_cast<UndefValue>(I)) { - assert(undef->getType()->isPointerTy() && - "Base for pointer must be another pointer"); - return undef; // utterly meaningless, but useful for dealing with - // partially optimized code. - } + if (isa<UndefValue>(I)) + // utterly meaningless, but useful for dealing with + // partially optimized code. + return I; // Due to inheritance, this must be _after_ the global variable and undef // checks - if (Constant *con = dyn_cast<Constant>(I)) { + if (Constant *Con = dyn_cast<Constant>(I)) { assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) && "order of checks wrong!"); // Note: Finding a constant base for something marked for relocation @@ -368,49 +340,30 @@ static Value *findBaseDefiningValue(Value *I) { // off a potentially null value and have proven it null. We also use // null pointers in dead paths of relocation phis (which we might later // want to find a base pointer for). - assert(con->getType()->isPointerTy() && - "Base for pointer must be another pointer"); - assert(con->isNullValue() && "null is the only case which makes sense"); - return con; + assert(isa<ConstantPointerNull>(Con) && + "null is the only case which makes sense"); + return Con; } if (CastInst *CI = dyn_cast<CastInst>(I)) { - Value *def = CI->stripPointerCasts(); - assert(def->getType()->isPointerTy() && - "Base for pointer must be another pointer"); + Value *Def = CI->stripPointerCasts(); // If we find a cast instruction here, it means we've found a cast which is // not simply a pointer cast (i.e. an inttoptr). We don't know how to // handle int->ptr conversion. - assert(!isa<CastInst>(def) && "shouldn't find another cast here"); - return findBaseDefiningValue(def); + assert(!isa<CastInst>(Def) && "shouldn't find another cast here"); + return findBaseDefiningValue(Def); } - if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - if (LI->getType()->isPointerTy()) { - Value *Op = LI->getOperand(0); - (void)Op; - // Has to be a pointer to an gc object, or possibly an array of such? - assert(Op->getType()->isPointerTy()); - return LI; // The value loaded is an gc base itself - } - } - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { - Value *Op = GEP->getOperand(0); - if (Op->getType()->isPointerTy()) { - return findBaseDefiningValue(Op); // The base of this GEP is the base - } - } + if (isa<LoadInst>(I)) + return I; // The value loaded is an gc base itself - if (AllocaInst *alloc = dyn_cast<AllocaInst>(I)) { - // An alloca represents a conceptual stack slot. It's the slot itself - // that the GC needs to know about, not the value in the slot. - assert(alloc->getType()->isPointerTy() && - "Base for pointer must be another pointer"); - return alloc; - } + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) + // The base of this GEP is the base + return findBaseDefiningValue(GEP->getPointerOperand()); if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { switch (II->getIntrinsicID()) { + case Intrinsic::experimental_gc_result_ptr: default: // fall through to general call handling break; @@ -418,11 +371,6 @@ static Value *findBaseDefiningValue(Value *I) { case Intrinsic::experimental_gc_result_float: case Intrinsic::experimental_gc_result_int: llvm_unreachable("these don't produce pointers"); - case Intrinsic::experimental_gc_result_ptr: - // This is just a special case of the CallInst check below to handle a - // statepoint with deopt args which hasn't been rewritten for GC yet. - // TODO: Assert that the statepoint isn't rewritten yet. - return II; case Intrinsic::experimental_gc_relocate: { // Rerunning safepoint insertion after safepoints are already // inserted is not supported. It could probably be made to work, @@ -440,41 +388,27 @@ static Value *findBaseDefiningValue(Value *I) { // We assume that functions in the source language only return base // pointers. This should probably be generalized via attributes to support // both source language and internal functions. - if (CallInst *call = dyn_cast<CallInst>(I)) { - assert(call->getType()->isPointerTy() && - "Base for pointer must be another pointer"); - return call; - } - if (InvokeInst *invoke = dyn_cast<InvokeInst>(I)) { - assert(invoke->getType()->isPointerTy() && - "Base for pointer must be another pointer"); - return invoke; - } + if (isa<CallInst>(I) || isa<InvokeInst>(I)) + return I; // I have absolutely no idea how to implement this part yet. It's not // neccessarily hard, I just haven't really looked at it yet. assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented"); - if (AtomicCmpXchgInst *cas = dyn_cast<AtomicCmpXchgInst>(I)) { + if (isa<AtomicCmpXchgInst>(I)) // A CAS is effectively a atomic store and load combined under a // predicate. From the perspective of base pointers, we just treat it - // like a load. We loaded a pointer from a address in memory, that value - // had better be a valid base pointer. - return cas->getPointerOperand(); - } - if (AtomicRMWInst *atomic = dyn_cast<AtomicRMWInst>(I)) { - assert(AtomicRMWInst::Xchg == atomic->getOperation() && - "All others are binary ops which don't apply to base pointers"); - // semantically, a load, store pair. Treat it the same as a standard load - return atomic->getPointerOperand(); - } + // like a load. + return I; + + assert(!isa<AtomicRMWInst>(I) && "Xchg handled above, all others are " + "binary ops which don't apply to pointers"); // The aggregate ops. Aggregates can either be in the heap or on the // stack, but in either case, this is simply a field load. As a result, // this is a defining definition of the base just like a load is. - if (ExtractValueInst *ev = dyn_cast<ExtractValueInst>(I)) { - return ev; - } + if (isa<ExtractValueInst>(I)) + return I; // We should never see an insert vector since that would require we be // tracing back a struct value not a pointer value. @@ -485,23 +419,21 @@ static Value *findBaseDefiningValue(Value *I) { // return a value which dynamically selects from amoung several base // derived pointers (each with it's own base potentially). It's the job of // the caller to resolve these. - if (SelectInst *select = dyn_cast<SelectInst>(I)) { - return select; - } - - return cast<PHINode>(I); + assert((isa<SelectInst>(I) || isa<PHINode>(I)) && + "missing instruction case in findBaseDefiningValing"); + return I; } /// Returns the base defining value for this value. -static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &cache) { - Value *&Cached = cache[I]; +static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache) { + Value *&Cached = Cache[I]; if (!Cached) { Cached = findBaseDefiningValue(I); } - assert(cache[I] != nullptr); + assert(Cache[I] != nullptr); if (TraceLSP) { - errs() << "fBDV-cached: " << I->getName() << " -> " << Cached->getName() + dbgs() << "fBDV-cached: " << I->getName() << " -> " << Cached->getName() << "\n"; } return Cached; @@ -509,25 +441,26 @@ static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &cache) { /// Return a base pointer for this value if known. Otherwise, return it's /// base defining value. -static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &cache) { - Value *def = findBaseDefiningValueCached(I, cache); - auto Found = cache.find(def); - if (Found != cache.end()) { +static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) { + Value *Def = findBaseDefiningValueCached(I, Cache); + auto Found = Cache.find(Def); + if (Found != Cache.end()) { // Either a base-of relation, or a self reference. Caller must check. return Found->second; } // Only a BDV available - return def; + return Def; } /// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV, /// is it known to be a base pointer? Or do we need to continue searching. -static bool isKnownBaseResult(Value *v) { - if (!isa<PHINode>(v) && !isa<SelectInst>(v)) { +static bool isKnownBaseResult(Value *V) { + if (!isa<PHINode>(V) && !isa<SelectInst>(V)) { // no recursion possible return true; } - if (cast<Instruction>(v)->getMetadata("is_base_value")) { + if (isa<Instruction>(V) && + cast<Instruction>(V)->getMetadata("is_base_value")) { // This is a previously inserted base phi or select. We know // that this is a base value. return true; @@ -647,8 +580,7 @@ private: /// from. For gc objects, this is simply itself. On success, returns a value /// which is the base pointer. (This is reliable and can be used for /// relocation.) On failure, returns nullptr. -static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, - DenseSet<llvm::Value *> &NewInsertedDefs) { +static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) { Value *def = findBaseOrBDV(I, cache); if (isKnownBaseResult(def)) { @@ -687,7 +619,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, done = true; // Since we're adding elements to 'states' as we run, we can't keep // iterators into the set. - SmallVector<Value*, 16> Keys; + SmallVector<Value *, 16> Keys; Keys.reserve(states.size()); for (auto Pair : states) { Value *V = Pair.first; @@ -777,7 +709,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, // We want to keep naming deterministic in the loop that follows, so // sort the keys before iteration. This is useful in allowing us to // write stable tests. Note that there is no invalidation issue here. - SmallVector<Value*, 16> Keys; + SmallVector<Value *, 16> Keys; Keys.reserve(states.size()); for (auto Pair : states) { Value *V = Pair.first; @@ -792,13 +724,12 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, assert(!state.isUnknown() && "Optimistic algorithm didn't complete!"); if (!state.isConflict()) continue; - + if (isa<PHINode>(v)) { int num_preds = std::distance(pred_begin(v->getParent()), pred_end(v->getParent())); assert(num_preds > 0 && "how did we reach here"); PHINode *phi = PHINode::Create(v->getType(), num_preds, "base_phi", v); - NewInsertedDefs.insert(phi); // Add metadata marking this as a base value auto *const_1 = ConstantInt::get( Type::getInt32Ty( @@ -815,7 +746,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, UndefValue *undef = UndefValue::get(sel->getType()); SelectInst *basesel = SelectInst::Create(sel->getCondition(), undef, undef, "base_select", sel); - NewInsertedDefs.insert(basesel); // Add metadata marking this as a base value auto *const_1 = ConstantInt::get( Type::getInt32Ty( @@ -838,7 +768,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, assert(!state.isUnknown() && "Optimistic algorithm didn't complete!"); if (!state.isConflict()) continue; - + if (PHINode *basephi = dyn_cast<PHINode>(state.getBase())) { PHINode *phi = cast<PHINode>(v); unsigned NumPHIValues = phi->getNumIncomingValues(); @@ -866,8 +796,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, assert(states.count(base)); base = states[base].getBase(); assert(base != nullptr && "unknown PhiState!"); - assert(NewInsertedDefs.count(base) && - "should have already added this in a prev. iteration!"); } // In essense this assert states: the only way two @@ -898,7 +826,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, if (base->getType() != basephi->getType()) { base = new BitCastInst(base, basephi->getType(), "cast", InBB->getTerminator()); - NewInsertedDefs.insert(base); } basephi->addIncoming(base, InBB); } @@ -924,7 +851,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, // The cast is needed since base traversal may strip away bitcasts if (base->getType() != basesel->getType()) { base = new BitCastInst(base, basesel->getType(), "cast", basesel); - NewInsertedDefs.insert(base); } basesel->setOperand(i, base); } @@ -979,18 +905,18 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, // post condition: PointerToBase contains one (derived, base) pair for every // pointer in live. Note that derived can be equal to base if the original // pointer was a base pointer. -static void findBasePointers(const StatepointLiveSetTy &live, - DenseMap<llvm::Value *, llvm::Value *> &PointerToBase, - DominatorTree *DT, DefiningValueMapTy &DVCache, - DenseSet<llvm::Value *> &NewInsertedDefs) { +static void +findBasePointers(const StatepointLiveSetTy &live, + DenseMap<llvm::Value *, llvm::Value *> &PointerToBase, + DominatorTree *DT, DefiningValueMapTy &DVCache) { // For the naming of values inserted to be deterministic - which makes for // much cleaner and more stable tests - we need to assign an order to the // live values. DenseSets do not provide a deterministic order across runs. - SmallVector<Value*, 64> Temp; + SmallVector<Value *, 64> Temp; Temp.insert(Temp.end(), live.begin(), live.end()); std::sort(Temp.begin(), Temp.end(), order_by_name); for (Value *ptr : Temp) { - Value *base = findBasePointer(ptr, DVCache, NewInsertedDefs); + Value *base = findBasePointer(ptr, DVCache); assert(base && "failed to find base pointer"); PointerToBase[ptr] = base; assert((!isa<Instruction>(base) || !isa<Instruction>(ptr) || @@ -1001,10 +927,10 @@ static void findBasePointers(const StatepointLiveSetTy &live, // If you see this trip and like to live really dangerously, the code should // be correct, just with idioms the verifier can't handle. You can try // disabling the verifier at your own substaintial risk. - assert(!isNullConstant(base) && "the relocation code needs adjustment to " - "handle the relocation of a null pointer " - "constant without causing false positives " - "in the safepoint ir verifier."); + assert(!isa<ConstantPointerNull>(base) && + "the relocation code needs adjustment to handle the relocation of " + "a null pointer constant without causing false positives in the " + "safepoint ir verifier."); } } @@ -1014,14 +940,13 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache, const CallSite &CS, PartiallyConstructedSafepointRecord &result) { DenseMap<llvm::Value *, llvm::Value *> PointerToBase; - DenseSet<llvm::Value *> NewInsertedDefs; - findBasePointers(result.liveset, PointerToBase, &DT, DVCache, NewInsertedDefs); + findBasePointers(result.liveset, PointerToBase, &DT, DVCache); if (PrintBasePointers) { // Note: Need to print these in a stable order since this is checked in // some tests. errs() << "Base Pairs (w/o Relocation):\n"; - SmallVector<Value*, 64> Temp; + SmallVector<Value *, 64> Temp; Temp.reserve(PointerToBase.size()); for (auto Pair : PointerToBase) { Temp.push_back(Pair.first); @@ -1029,90 +954,59 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache, std::sort(Temp.begin(), Temp.end(), order_by_name); for (Value *Ptr : Temp) { Value *Base = PointerToBase[Ptr]; - errs() << " derived %" << Ptr->getName() << " base %" - << Base->getName() << "\n"; + errs() << " derived %" << Ptr->getName() << " base %" << Base->getName() + << "\n"; } } result.PointerToBase = PointerToBase; - result.NewInsertedDefs = NewInsertedDefs; } -/// Check for liveness of items in the insert defs and add them to the live -/// and base pointer sets -static void fixupLiveness(DominatorTree &DT, const CallSite &CS, - const DenseSet<Value *> &allInsertedDefs, - PartiallyConstructedSafepointRecord &result) { - Instruction *inst = CS.getInstruction(); - - auto liveset = result.liveset; - auto PointerToBase = result.PointerToBase; +/// Given an updated version of the dataflow liveness results, update the +/// liveset and base pointer maps for the call site CS. +static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, + const CallSite &CS, + PartiallyConstructedSafepointRecord &result); - auto is_live_gc_reference = - [&](Value &V) { return isLiveGCReferenceAt(V, inst, DT, nullptr); }; - - // For each new definition, check to see if a) the definition dominates the - // instruction we're interested in, and b) one of the uses of that definition - // is edge-reachable from the instruction we're interested in. This is the - // same definition of liveness we used in the intial liveness analysis - for (Value *newDef : allInsertedDefs) { - if (liveset.count(newDef)) { - // already live, no action needed - continue; - } - - // PERF: Use DT to check instruction domination might not be good for - // compilation time, and we could change to optimal solution if this - // turn to be a issue - if (!DT.dominates(cast<Instruction>(newDef), inst)) { - // can't possibly be live at inst - continue; - } - - if (is_live_gc_reference(*newDef)) { - // Add the live new defs into liveset and PointerToBase - liveset.insert(newDef); - PointerToBase[newDef] = newDef; - } - } - - result.liveset = liveset; - result.PointerToBase = PointerToBase; -} - -static void fixupLiveReferences( - Function &F, DominatorTree &DT, Pass *P, - const DenseSet<llvm::Value *> &allInsertedDefs, - ArrayRef<CallSite> toUpdate, +static void recomputeLiveInValues( + Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate, MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) { + // TODO-PERF: reuse the original liveness, then simply run the dataflow + // again. The old values are still live and will help it stablize quickly. + GCPtrLivenessData RevisedLivenessData; + computeLiveInValues(DT, F, RevisedLivenessData); for (size_t i = 0; i < records.size(); i++) { struct PartiallyConstructedSafepointRecord &info = records[i]; const CallSite &CS = toUpdate[i]; - fixupLiveness(DT, CS, allInsertedDefs, info); + recomputeLiveInValues(RevisedLivenessData, CS, info); } } -// Normalize basic block to make it ready to be target of invoke statepoint. -// It means spliting it to have single predecessor. Return newly created BB -// ready to be successor of invoke statepoint. -static BasicBlock *normalizeBBForInvokeSafepoint(BasicBlock *BB, - BasicBlock *InvokeParent, - Pass *P) { - BasicBlock *ret = BB; - +// When inserting gc.relocate calls, we need to ensure there are no uses +// of the original value between the gc.statepoint and the gc.relocate call. +// One case which can arise is a phi node starting one of the successor blocks. +// We also need to be able to insert the gc.relocates only on the path which +// goes through the statepoint. We might need to split an edge to make this +// possible. +static BasicBlock * +normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, Pass *P) { + DominatorTree *DT = nullptr; + if (auto *DTP = P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) + DT = &DTP->getDomTree(); + + BasicBlock *Ret = BB; if (!BB->getUniquePredecessor()) { - ret = SplitBlockPredecessors(BB, InvokeParent, ""); + Ret = SplitBlockPredecessors(BB, InvokeParent, "", nullptr, DT); } - // Another requirement for such basic blocks is to not have any phi nodes. - // Since we just ensured that new BB will have single predecessor, - // all phi nodes in it will have one value. Here it would be naturall place - // to - // remove them all. But we can not do this because we are risking to remove - // one of the values stored in liveset of another statepoint. We will do it - // later after placing all safepoints. + // Now that 'ret' has unique predecessor we can safely remove all phi nodes + // from it + FoldSingleEntryPHINodes(Ret); + assert(!isa<PHINode>(Ret->begin())); - return ret; + // At this point, we can safely insert a gc.relocate as the first instruction + // in Ret if needed. + return Ret; } static int find_index(ArrayRef<Value *> livevec, Value *val) { @@ -1180,7 +1074,7 @@ static void CreateGCRelocates(ArrayRef<llvm::Value *> liveVariables, // combination. This results is some blow up the function declarations in // the IR, but removes the need for argument bitcasts which shrinks the IR // greatly and makes it much more readable. - SmallVector<Type *, 1> types; // one per 'any' type + SmallVector<Type *, 1> types; // one per 'any' type types.push_back(liveVariables[i]->getType()); // result type Value *gc_relocate_decl = Intrinsic::getDeclaration( M, Intrinsic::experimental_gc_relocate, types); @@ -1298,8 +1192,10 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */ token = invoke; // Generate gc relocates in exceptional path - BasicBlock *unwindBlock = normalizeBBForInvokeSafepoint( - toReplace->getUnwindDest(), invoke->getParent(), P); + BasicBlock *unwindBlock = toReplace->getUnwindDest(); + assert(!isa<PHINode>(unwindBlock->begin()) && + unwindBlock->getUniquePredecessor() && + "can't safely insert in this block!"); Instruction *IP = &*(unwindBlock->getFirstInsertionPt()); Builder.SetInsertPoint(IP); @@ -1319,8 +1215,10 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */ exceptional_token, Builder); // Generate gc relocates and returns for normal block - BasicBlock *normalDest = normalizeBBForInvokeSafepoint( - toReplace->getNormalDest(), invoke->getParent(), P); + BasicBlock *normalDest = toReplace->getNormalDest(); + assert(!isa<PHINode>(normalDest->begin()) && + normalDest->getUniquePredecessor() && + "can't safely insert in this block!"); IP = &*(normalDest->getFirstInsertionPt()); Builder.SetInsertPoint(IP); @@ -1333,7 +1231,7 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */ // Take the name of the original value call if it had one. token->takeName(CS.getInstruction()); - // The GCResult is already inserted, we just need to find it +// The GCResult is already inserted, we just need to find it #ifndef NDEBUG Instruction *toReplace = CS.getInstruction(); assert((toReplace->hasNUses(0) || toReplace->hasNUses(1)) && @@ -1351,7 +1249,6 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */ // Second, create a gc.relocate for every live variable CreateGCRelocates(liveVariables, live_start, basePtrs, token, Builder); - } namespace { @@ -1383,7 +1280,7 @@ static void stablize_order(SmallVectorImpl<Value *> &basevec, // Replace an existing gc.statepoint with a new one and a set of gc.relocates // which make the relocations happening at this safepoint explicit. -// +// // WARNING: Does not do any fixup to adjust users of the original live // values. That's the callers responsibility. static void @@ -1458,14 +1355,13 @@ static void relocationViaAlloca( Function &F, DominatorTree &DT, ArrayRef<Value *> live, ArrayRef<struct PartiallyConstructedSafepointRecord> records) { #ifndef NDEBUG - int initialAllocaNum = 0; - - // record initial number of allocas - for (inst_iterator itr = inst_begin(F), end = inst_end(F); itr != end; - itr++) { - if (isa<AllocaInst>(*itr)) - initialAllocaNum++; - } + // record initial number of (static) allocas; we'll check we have the same + // number when we get done. + int InitialAllocaNum = 0; + for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E; + I++) + if (isa<AllocaInst>(*I)) + InitialAllocaNum++; #endif // TODO-PERF: change data structures, reserve @@ -1505,47 +1401,49 @@ static void relocationViaAlloca( // In case if it was invoke statepoint // we will insert stores for exceptional path gc relocates. if (isa<InvokeInst>(Statepoint)) { - insertRelocationStores(info.UnwindToken->users(), - allocaMap, visitedLiveValues); + insertRelocationStores(info.UnwindToken->users(), allocaMap, + visitedLiveValues); } -#ifndef NDEBUG - // As a debuging aid, pretend that an unrelocated pointer becomes null at - // the gc.statepoint. This will turn some subtle GC problems into slightly - // easier to debug SEGVs - SmallVector<AllocaInst *, 64> ToClobber; - for (auto Pair : allocaMap) { - Value *Def = Pair.first; - AllocaInst *Alloca = cast<AllocaInst>(Pair.second); - - // This value was relocated - if (visitedLiveValues.count(Def)) { - continue; + if (ClobberNonLive) { + // As a debuging aid, pretend that an unrelocated pointer becomes null at + // the gc.statepoint. This will turn some subtle GC problems into + // slightly easier to debug SEGVs. Note that on large IR files with + // lots of gc.statepoints this is extremely costly both memory and time + // wise. + SmallVector<AllocaInst *, 64> ToClobber; + for (auto Pair : allocaMap) { + Value *Def = Pair.first; + AllocaInst *Alloca = cast<AllocaInst>(Pair.second); + + // This value was relocated + if (visitedLiveValues.count(Def)) { + continue; + } + ToClobber.push_back(Alloca); } - ToClobber.push_back(Alloca); - } - auto InsertClobbersAt = [&](Instruction *IP) { - for (auto *AI : ToClobber) { - auto AIType = cast<PointerType>(AI->getType()); - auto PT = cast<PointerType>(AIType->getElementType()); - Constant *CPN = ConstantPointerNull::get(PT); - StoreInst *store = new StoreInst(CPN, AI); - store->insertBefore(IP); - } - }; + auto InsertClobbersAt = [&](Instruction *IP) { + for (auto *AI : ToClobber) { + auto AIType = cast<PointerType>(AI->getType()); + auto PT = cast<PointerType>(AIType->getElementType()); + Constant *CPN = ConstantPointerNull::get(PT); + StoreInst *store = new StoreInst(CPN, AI); + store->insertBefore(IP); + } + }; - // Insert the clobbering stores. These may get intermixed with the - // gc.results and gc.relocates, but that's fine. - if (auto II = dyn_cast<InvokeInst>(Statepoint)) { - InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt()); - InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt()); - } else { - BasicBlock::iterator Next(cast<CallInst>(Statepoint)); - Next++; - InsertClobbersAt(Next); + // Insert the clobbering stores. These may get intermixed with the + // gc.results and gc.relocates, but that's fine. + if (auto II = dyn_cast<InvokeInst>(Statepoint)) { + InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt()); + InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt()); + } else { + BasicBlock::iterator Next(cast<CallInst>(Statepoint)); + Next++; + InsertClobbersAt(Next); + } } -#endif } // update use with load allocas and add store for gc_relocated for (auto Pair : allocaMap) { @@ -1603,11 +1501,11 @@ static void relocationViaAlloca( assert(!inst->isTerminator() && "The only TerminatorInst that can produce a value is " "InvokeInst which is handled above."); - store->insertAfter(inst); + store->insertAfter(inst); } } else { assert((isa<Argument>(def) || isa<GlobalVariable>(def) || - (isa<Constant>(def) && cast<Constant>(def)->isNullValue())) && + isa<ConstantPointerNull>(def)) && "Must be argument or global"); store->insertAfter(cast<Instruction>(alloca)); } @@ -1621,12 +1519,11 @@ static void relocationViaAlloca( } #ifndef NDEBUG - for (inst_iterator itr = inst_begin(F), end = inst_end(F); itr != end; - itr++) { - if (isa<AllocaInst>(*itr)) - initialAllocaNum--; - } - assert(initialAllocaNum == 0 && "We must not introduce any extra allocas"); + for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E; + I++) + if (isa<AllocaInst>(*I)) + InitialAllocaNum--; + assert(InitialAllocaNum == 0 && "We must not introduce any extra allocas"); #endif } @@ -1647,76 +1544,155 @@ template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) { } } -static Function *getUseHolder(Module &M) { - FunctionType *ftype = - FunctionType::get(Type::getVoidTy(M.getContext()), true); - Function *Func = cast<Function>(M.getOrInsertFunction("__tmp_use", ftype)); - return Func; -} - /// Insert holders so that each Value is obviously live through the entire -/// liftetime of the call. +/// lifetime of the call. static void insertUseHolderAfter(CallSite &CS, const ArrayRef<Value *> Values, - SmallVectorImpl<CallInst *> &holders) { + SmallVectorImpl<CallInst *> &Holders) { + if (Values.empty()) + // No values to hold live, might as well not insert the empty holder + return; + Module *M = CS.getInstruction()->getParent()->getParent()->getParent(); - Function *Func = getUseHolder(*M); + // Use a dummy vararg function to actually hold the values live + Function *Func = cast<Function>(M->getOrInsertFunction( + "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true))); if (CS.isCall()) { // For call safepoints insert dummy calls right after safepoint - BasicBlock::iterator next(CS.getInstruction()); - next++; - CallInst *base_holder = CallInst::Create(Func, Values, "", next); - holders.push_back(base_holder); - } else if (CS.isInvoke()) { - // For invoke safepooints insert dummy calls both in normal and - // exceptional destination blocks - InvokeInst *invoke = cast<InvokeInst>(CS.getInstruction()); - CallInst *normal_holder = CallInst::Create( - Func, Values, "", invoke->getNormalDest()->getFirstInsertionPt()); - CallInst *unwind_holder = CallInst::Create( - Func, Values, "", invoke->getUnwindDest()->getFirstInsertionPt()); - holders.push_back(normal_holder); - holders.push_back(unwind_holder); - } else - llvm_unreachable("unsupported call type"); + BasicBlock::iterator Next(CS.getInstruction()); + Next++; + Holders.push_back(CallInst::Create(Func, Values, "", Next)); + return; + } + // For invoke safepooints insert dummy calls both in normal and + // exceptional destination blocks + auto *II = cast<InvokeInst>(CS.getInstruction()); + Holders.push_back(CallInst::Create( + Func, Values, "", II->getNormalDest()->getFirstInsertionPt())); + Holders.push_back(CallInst::Create( + Func, Values, "", II->getUnwindDest()->getFirstInsertionPt())); } static void findLiveReferences( Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate, MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) { + GCPtrLivenessData OriginalLivenessData; + computeLiveInValues(DT, F, OriginalLivenessData); for (size_t i = 0; i < records.size(); i++) { struct PartiallyConstructedSafepointRecord &info = records[i]; const CallSite &CS = toUpdate[i]; - analyzeParsePointLiveness(DT, CS, info); + analyzeParsePointLiveness(DT, OriginalLivenessData, CS, info); } } -static void addBasesAsLiveValues(StatepointLiveSetTy &liveset, - DenseMap<Value *, Value *> &PointerToBase) { - // Identify any base pointers which are used in this safepoint, but not - // themselves relocated. We need to relocate them so that later inserted - // safepoints can get the properly relocated base register. - DenseSet<Value *> missing; - for (Value *L : liveset) { - assert(PointerToBase.find(L) != PointerToBase.end()); - Value *base = PointerToBase[L]; - assert(base); - if (liveset.find(base) == liveset.end()) { - assert(PointerToBase.find(base) == PointerToBase.end()); - // uniqued by set insert - missing.insert(base); +/// Remove any vector of pointers from the liveset by scalarizing them over the +/// statepoint instruction. Adds the scalarized pieces to the liveset. It +/// would be preferrable to include the vector in the statepoint itself, but +/// the lowering code currently does not handle that. Extending it would be +/// slightly non-trivial since it requires a format change. Given how rare +/// such cases are (for the moment?) scalarizing is an acceptable comprimise. +static void splitVectorValues(Instruction *StatepointInst, + StatepointLiveSetTy &LiveSet, DominatorTree &DT) { + SmallVector<Value *, 16> ToSplit; + for (Value *V : LiveSet) + if (isa<VectorType>(V->getType())) + ToSplit.push_back(V); + + if (ToSplit.empty()) + return; + + Function &F = *(StatepointInst->getParent()->getParent()); + + DenseMap<Value *, AllocaInst *> AllocaMap; + // First is normal return, second is exceptional return (invoke only) + DenseMap<Value *, std::pair<Value *, Value *>> Replacements; + for (Value *V : ToSplit) { + LiveSet.erase(V); + + AllocaInst *Alloca = + new AllocaInst(V->getType(), "", F.getEntryBlock().getFirstNonPHI()); + AllocaMap[V] = Alloca; + + VectorType *VT = cast<VectorType>(V->getType()); + IRBuilder<> Builder(StatepointInst); + SmallVector<Value *, 16> Elements; + for (unsigned i = 0; i < VT->getNumElements(); i++) + Elements.push_back(Builder.CreateExtractElement(V, Builder.getInt32(i))); + LiveSet.insert(Elements.begin(), Elements.end()); + + auto InsertVectorReform = [&](Instruction *IP) { + Builder.SetInsertPoint(IP); + Builder.SetCurrentDebugLocation(IP->getDebugLoc()); + Value *ResultVec = UndefValue::get(VT); + for (unsigned i = 0; i < VT->getNumElements(); i++) + ResultVec = Builder.CreateInsertElement(ResultVec, Elements[i], + Builder.getInt32(i)); + return ResultVec; + }; + + if (isa<CallInst>(StatepointInst)) { + BasicBlock::iterator Next(StatepointInst); + Next++; + Instruction *IP = &*(Next); + Replacements[V].first = InsertVectorReform(IP); + Replacements[V].second = nullptr; + } else { + InvokeInst *Invoke = cast<InvokeInst>(StatepointInst); + // We've already normalized - check that we don't have shared destination + // blocks + BasicBlock *NormalDest = Invoke->getNormalDest(); + assert(!isa<PHINode>(NormalDest->begin())); + BasicBlock *UnwindDest = Invoke->getUnwindDest(); + assert(!isa<PHINode>(UnwindDest->begin())); + // Insert insert element sequences in both successors + Instruction *IP = &*(NormalDest->getFirstInsertionPt()); + Replacements[V].first = InsertVectorReform(IP); + IP = &*(UnwindDest->getFirstInsertionPt()); + Replacements[V].second = InsertVectorReform(IP); } } + for (Value *V : ToSplit) { + AllocaInst *Alloca = AllocaMap[V]; + + // Capture all users before we start mutating use lists + SmallVector<Instruction *, 16> Users; + for (User *U : V->users()) + Users.push_back(cast<Instruction>(U)); + + for (Instruction *I : Users) { + if (auto Phi = dyn_cast<PHINode>(I)) { + for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++) + if (V == Phi->getIncomingValue(i)) { + LoadInst *Load = new LoadInst( + Alloca, "", Phi->getIncomingBlock(i)->getTerminator()); + Phi->setIncomingValue(i, Load); + } + } else { + LoadInst *Load = new LoadInst(Alloca, "", I); + I->replaceUsesOfWith(V, Load); + } + } - // Note that we want these at the end of the list, otherwise - // register placement gets screwed up once we lower to STATEPOINT - // instructions. This is an utter hack, but there doesn't seem to be a - // better one. - for (Value *base : missing) { - assert(base); - liveset.insert(base); - PointerToBase[base] = base; - } - assert(liveset.size() == PointerToBase.size()); + // Store the original value and the replacement value into the alloca + StoreInst *Store = new StoreInst(V, Alloca); + if (auto I = dyn_cast<Instruction>(V)) + Store->insertAfter(I); + else + Store->insertAfter(Alloca); + + // Normal return for invoke, or call return + Instruction *Replacement = cast<Instruction>(Replacements[V].first); + (new StoreInst(Replacement, Alloca))->insertAfter(Replacement); + // Unwind return for invoke only + Replacement = cast_or_null<Instruction>(Replacements[V].second); + if (Replacement) + (new StoreInst(Replacement, Alloca))->insertAfter(Replacement); + } + + // apply mem2reg to promote alloca to SSA + SmallVector<AllocaInst *, 16> Allocas; + for (Value *V : ToSplit) + Allocas.push_back(AllocaMap[V]); + PromoteMemToReg(Allocas, DT); } static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, @@ -1734,6 +1710,20 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, } #endif + // When inserting gc.relocates for invokes, we need to be able to insert at + // the top of the successor blocks. See the comment on + // normalForInvokeSafepoint on exactly what is needed. Note that this step + // may restructure the CFG. + for (CallSite CS : toUpdate) { + if (!CS.isInvoke()) + continue; + InvokeInst *invoke = cast<InvokeInst>(CS.getInstruction()); + normalizeForInvokeSafepoint(invoke->getNormalDest(), invoke->getParent(), + P); + normalizeForInvokeSafepoint(invoke->getUnwindDest(), invoke->getParent(), + P); + } + // A list of dummy calls added to the IR to keep various values obviously // live in the IR. We'll remove all of these when done. SmallVector<CallInst *, 64> holders; @@ -1749,7 +1739,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, SmallVector<Value *, 64> DeoptValues; for (Use &U : StatepointCS.vm_state_args()) { Value *Arg = cast<Value>(&U); - if (isGCPointerType(Arg->getType())) + assert(!isUnhandledGCPointerType(Arg->getType()) && + "support for FCA unimplemented"); + if (isHandledGCPointerType(Arg->getType())) DeoptValues.push_back(Arg); } insertUseHolderAfter(CS, DeoptValues, holders); @@ -1767,6 +1759,17 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, // site. findLiveReferences(F, DT, P, toUpdate, records); + // Do a limited scalarization of any live at safepoint vector values which + // contain pointers. This enables this pass to run after vectorization at + // the cost of some possible performance loss. TODO: it would be nice to + // natively support vectors all the way through the backend so we don't need + // to scalarize here. + for (size_t i = 0; i < records.size(); i++) { + struct PartiallyConstructedSafepointRecord &info = records[i]; + Instruction *statepoint = toUpdate[i].getInstruction(); + splitVectorValues(cast<Instruction>(statepoint), info.liveset, DT); + } + // B) Find the base pointers for each live pointer /* scope for caching */ { // Cache the 'defining value' relation used in the computation and @@ -1790,13 +1793,6 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, // gep a + 1 // safepoint 2 // br loop - DenseSet<llvm::Value *> allInsertedDefs; - for (size_t i = 0; i < records.size(); i++) { - struct PartiallyConstructedSafepointRecord &info = records[i]; - allInsertedDefs.insert(info.NewInsertedDefs.begin(), - info.NewInsertedDefs.end()); - } - // We insert some dummy calls after each safepoint to definitely hold live // the base pointers which were identified for that safepoint. We'll then // ask liveness for _every_ base inserted to see what is now live. Then we @@ -1813,22 +1809,11 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, insertUseHolderAfter(CS, Bases, holders); } - // Add the bases explicitly to the live vector set. This may result in a few - // extra relocations, but the base has to be available whenever a pointer - // derived from it is used. Thus, we need it to be part of the statepoint's - // gc arguments list. TODO: Introduce an explicit notion (in the following - // code) of the GC argument list as seperate from the live Values at a - // given statepoint. - for (size_t i = 0; i < records.size(); i++) { - struct PartiallyConstructedSafepointRecord &info = records[i]; - addBasesAsLiveValues(info.liveset, info.PointerToBase); - } + // By selecting base pointers, we've effectively inserted new uses. Thus, we + // need to rerun liveness. We may *also* have inserted new defs, but that's + // not the key issue. + recomputeLiveInValues(F, DT, P, toUpdate, records); - // If we inserted any new values, we need to adjust our notion of what is - // live at a particular safepoint. - if (!allInsertedDefs.empty()) { - fixupLiveReferences(F, DT, P, allInsertedDefs, toUpdate, records); - } if (PrintBasePointers) { for (size_t i = 0; i < records.size(); i++) { struct PartiallyConstructedSafepointRecord &info = records[i]; @@ -1858,25 +1843,6 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, } toUpdate.clear(); // prevent accident use of invalid CallSites - // In case if we inserted relocates in a different basic block than the - // original safepoint (this can happen for invokes). We need to be sure that - // original values were not used in any of the phi nodes at the - // beginning of basic block containing them. Because we know that all such - // blocks will have single predecessor we can safely assume that all phi - // nodes have single entry (because of normalizeBBForInvokeSafepoint). - // Just remove them all here. - for (size_t i = 0; i < records.size(); i++) { - Instruction *I = records[i].StatepointToken; - - if (InvokeInst *invoke = dyn_cast<InvokeInst>(I)) { - FoldSingleEntryPHINodes(invoke->getNormalDest()); - assert(!isa<PHINode>(invoke->getNormalDest()->begin())); - - FoldSingleEntryPHINodes(invoke->getUnwindDest()); - assert(!isa<PHINode>(invoke->getUnwindDest()->begin())); - } - } - // Do all the fixups of the original live variables to their relocated selves SmallVector<Value *, 128> live; for (size_t i = 0; i < records.size(); i++) { @@ -1889,6 +1855,24 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, Statepoint statepoint(info.StatepointToken); live.insert(live.end(), statepoint.gc_args_begin(), statepoint.gc_args_end()); +#ifndef NDEBUG + // Do some basic sanity checks on our liveness results before performing + // relocation. Relocation can and will turn mistakes in liveness results + // into non-sensical code which is must harder to debug. + // TODO: It would be nice to test consistency as well + assert(DT.isReachableFromEntry(info.StatepointToken->getParent()) && + "statepoint must be reachable or liveness is meaningless"); + for (Value *V : statepoint.gc_args()) { + if (!isa<Instruction>(V)) + // Non-instruction values trivial dominate all possible uses + continue; + auto LiveInst = cast<Instruction>(V); + assert(DT.isReachableFromEntry(LiveInst->getParent()) && + "unreachable values should never be live"); + assert(DT.dominates(LiveInst, info.StatepointToken) && + "basic SSA liveness expectation violated by liveness analysis"); + } +#endif } unique_unsorted(live); @@ -1924,18 +1908,285 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F) { if (!shouldRewriteStatepointsIn(F)) return false; - // Gather all the statepoints which need rewritten. + DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + + // Gather all the statepoints which need rewritten. Be careful to only + // consider those in reachable code since we need to ask dominance queries + // when rewriting. We'll delete the unreachable ones in a moment. SmallVector<CallSite, 64> ParsePointNeeded; + bool HasUnreachableStatepoint = false; for (Instruction &I : inst_range(F)) { // TODO: only the ones with the flag set! - if (isStatepoint(I)) - ParsePointNeeded.push_back(CallSite(&I)); + if (isStatepoint(I)) { + if (DT.isReachableFromEntry(I.getParent())) + ParsePointNeeded.push_back(CallSite(&I)); + else + HasUnreachableStatepoint = true; + } } + bool MadeChange = false; + + // Delete any unreachable statepoints so that we don't have unrewritten + // statepoints surviving this pass. This makes testing easier and the + // resulting IR less confusing to human readers. Rather than be fancy, we + // just reuse a utility function which removes the unreachable blocks. + if (HasUnreachableStatepoint) + MadeChange |= removeUnreachableBlocks(F); + // Return early if no work to do. if (ParsePointNeeded.empty()) - return false; + return MadeChange; + + // As a prepass, go ahead and aggressively destroy single entry phi nodes. + // These are created by LCSSA. They have the effect of increasing the size + // of liveness sets for no good reason. It may be harder to do this post + // insertion since relocations and base phis can confuse things. + for (BasicBlock &BB : F) + if (BB.getUniquePredecessor()) { + MadeChange = true; + FoldSingleEntryPHINodes(&BB); + } - DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - return insertParsePoints(F, DT, this, ParsePointNeeded); + MadeChange |= insertParsePoints(F, DT, this, ParsePointNeeded); + return MadeChange; +} + +// liveness computation via standard dataflow +// ------------------------------------------------------------------- + +// TODO: Consider using bitvectors for liveness, the set of potentially +// interesting values should be small and easy to pre-compute. + +/// Is this value a constant consisting of entirely null values? +static bool isConstantNull(Value *V) { + return isa<Constant>(V) && cast<Constant>(V)->isNullValue(); +} + +/// Compute the live-in set for the location rbegin starting from +/// the live-out set of the basic block +static void computeLiveInValues(BasicBlock::reverse_iterator rbegin, + BasicBlock::reverse_iterator rend, + DenseSet<Value *> &LiveTmp) { + + for (BasicBlock::reverse_iterator ritr = rbegin; ritr != rend; ritr++) { + Instruction *I = &*ritr; + + // KILL/Def - Remove this definition from LiveIn + LiveTmp.erase(I); + + // Don't consider *uses* in PHI nodes, we handle their contribution to + // predecessor blocks when we seed the LiveOut sets + if (isa<PHINode>(I)) + continue; + + // USE - Add to the LiveIn set for this instruction + for (Value *V : I->operands()) { + assert(!isUnhandledGCPointerType(V->getType()) && + "support for FCA unimplemented"); + if (isHandledGCPointerType(V->getType()) && !isConstantNull(V) && + !isa<UndefValue>(V)) { + // The choice to exclude null and undef is arbitrary here. Reconsider? + LiveTmp.insert(V); + } + } + } +} + +static void computeLiveOutSeed(BasicBlock *BB, DenseSet<Value *> &LiveTmp) { + + for (BasicBlock *Succ : successors(BB)) { + const BasicBlock::iterator E(Succ->getFirstNonPHI()); + for (BasicBlock::iterator I = Succ->begin(); I != E; I++) { + PHINode *Phi = cast<PHINode>(&*I); + Value *V = Phi->getIncomingValueForBlock(BB); + assert(!isUnhandledGCPointerType(V->getType()) && + "support for FCA unimplemented"); + if (isHandledGCPointerType(V->getType()) && !isConstantNull(V) && + !isa<UndefValue>(V)) { + // The choice to exclude null and undef is arbitrary here. Reconsider? + LiveTmp.insert(V); + } + } + } +} + +static DenseSet<Value *> computeKillSet(BasicBlock *BB) { + DenseSet<Value *> KillSet; + for (Instruction &I : *BB) + if (isHandledGCPointerType(I.getType())) + KillSet.insert(&I); + return KillSet; +} + +#ifndef NDEBUG +/// Check that the items in 'Live' dominate 'TI'. This is used as a basic +/// sanity check for the liveness computation. +static void checkBasicSSA(DominatorTree &DT, DenseSet<Value *> &Live, + TerminatorInst *TI, bool TermOkay = false) { + for (Value *V : Live) { + if (auto *I = dyn_cast<Instruction>(V)) { + // The terminator can be a member of the LiveOut set. LLVM's definition + // of instruction dominance states that V does not dominate itself. As + // such, we need to special case this to allow it. + if (TermOkay && TI == I) + continue; + assert(DT.dominates(I, TI) && + "basic SSA liveness expectation violated by liveness analysis"); + } + } +} + +/// Check that all the liveness sets used during the computation of liveness +/// obey basic SSA properties. This is useful for finding cases where we miss +/// a def. +static void checkBasicSSA(DominatorTree &DT, GCPtrLivenessData &Data, + BasicBlock &BB) { + checkBasicSSA(DT, Data.LiveSet[&BB], BB.getTerminator()); + checkBasicSSA(DT, Data.LiveOut[&BB], BB.getTerminator(), true); + checkBasicSSA(DT, Data.LiveIn[&BB], BB.getTerminator()); +} +#endif + +static void computeLiveInValues(DominatorTree &DT, Function &F, + GCPtrLivenessData &Data) { + + SmallSetVector<BasicBlock *, 200> Worklist; + auto AddPredsToWorklist = [&](BasicBlock *BB) { + // We use a SetVector so that we don't have duplicates in the worklist. + Worklist.insert(pred_begin(BB), pred_end(BB)); + }; + auto NextItem = [&]() { + BasicBlock *BB = Worklist.back(); + Worklist.pop_back(); + return BB; + }; + + // Seed the liveness for each individual block + for (BasicBlock &BB : F) { + Data.KillSet[&BB] = computeKillSet(&BB); + Data.LiveSet[&BB].clear(); + computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB]); + +#ifndef NDEBUG + for (Value *Kill : Data.KillSet[&BB]) + assert(!Data.LiveSet[&BB].count(Kill) && "live set contains kill"); +#endif + + Data.LiveOut[&BB] = DenseSet<Value *>(); + computeLiveOutSeed(&BB, Data.LiveOut[&BB]); + Data.LiveIn[&BB] = Data.LiveSet[&BB]; + set_union(Data.LiveIn[&BB], Data.LiveOut[&BB]); + set_subtract(Data.LiveIn[&BB], Data.KillSet[&BB]); + if (!Data.LiveIn[&BB].empty()) + AddPredsToWorklist(&BB); + } + + // Propagate that liveness until stable + while (!Worklist.empty()) { + BasicBlock *BB = NextItem(); + + // Compute our new liveout set, then exit early if it hasn't changed + // despite the contribution of our successor. + DenseSet<Value *> LiveOut = Data.LiveOut[BB]; + const auto OldLiveOutSize = LiveOut.size(); + for (BasicBlock *Succ : successors(BB)) { + assert(Data.LiveIn.count(Succ)); + set_union(LiveOut, Data.LiveIn[Succ]); + } + // assert OutLiveOut is a subset of LiveOut + if (OldLiveOutSize == LiveOut.size()) { + // If the sets are the same size, then we didn't actually add anything + // when unioning our successors LiveIn Thus, the LiveIn of this block + // hasn't changed. + continue; + } + Data.LiveOut[BB] = LiveOut; + + // Apply the effects of this basic block + DenseSet<Value *> LiveTmp = LiveOut; + set_union(LiveTmp, Data.LiveSet[BB]); + set_subtract(LiveTmp, Data.KillSet[BB]); + + assert(Data.LiveIn.count(BB)); + const DenseSet<Value *> &OldLiveIn = Data.LiveIn[BB]; + // assert: OldLiveIn is a subset of LiveTmp + if (OldLiveIn.size() != LiveTmp.size()) { + Data.LiveIn[BB] = LiveTmp; + AddPredsToWorklist(BB); + } + } // while( !worklist.empty() ) + +#ifndef NDEBUG + // Sanity check our ouput against SSA properties. This helps catch any + // missing kills during the above iteration. + for (BasicBlock &BB : F) { + checkBasicSSA(DT, Data, BB); + } +#endif +} + +static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data, + StatepointLiveSetTy &Out) { + + BasicBlock *BB = Inst->getParent(); + + // Note: The copy is intentional and required + assert(Data.LiveOut.count(BB)); + DenseSet<Value *> LiveOut = Data.LiveOut[BB]; + + // We want to handle the statepoint itself oddly. It's + // call result is not live (normal), nor are it's arguments + // (unless they're used again later). This adjustment is + // specifically what we need to relocate + BasicBlock::reverse_iterator rend(Inst); + computeLiveInValues(BB->rbegin(), rend, LiveOut); + LiveOut.erase(Inst); + Out.insert(LiveOut.begin(), LiveOut.end()); +} + +static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, + const CallSite &CS, + PartiallyConstructedSafepointRecord &Info) { + Instruction *Inst = CS.getInstruction(); + StatepointLiveSetTy Updated; + findLiveSetAtInst(Inst, RevisedLivenessData, Updated); + +#ifndef NDEBUG + DenseSet<Value *> Bases; + for (auto KVPair : Info.PointerToBase) { + Bases.insert(KVPair.second); + } +#endif + // We may have base pointers which are now live that weren't before. We need + // to update the PointerToBase structure to reflect this. + for (auto V : Updated) + if (!Info.PointerToBase.count(V)) { + assert(Bases.count(V) && "can't find base for unexpected live value"); + Info.PointerToBase[V] = V; + continue; + } + +#ifndef NDEBUG + for (auto V : Updated) { + assert(Info.PointerToBase.count(V) && + "must be able to find base for live value"); + } +#endif + + // Remove any stale base mappings - this can happen since our liveness is + // more precise then the one inherent in the base pointer analysis + DenseSet<Value *> ToErase; + for (auto KVPair : Info.PointerToBase) + if (!Updated.count(KVPair.first)) + ToErase.insert(KVPair.first); + for (auto V : ToErase) + Info.PointerToBase.erase(V); + +#ifndef NDEBUG + for (auto KVPair : Info.PointerToBase) + assert(Updated.count(KVPair.first) && "record for non-live value"); +#endif + + Info.liveset = Updated; } diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 875a007..bc068f7 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -1012,7 +1012,8 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) { Constant *Ptr = Operands[0]; auto Indices = makeArrayRef(Operands.begin() + 1, Operands.end()); - markConstant(&I, ConstantExpr::getGetElementPtr(Ptr, Indices)); + markConstant(&I, ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr, + Indices)); } void SCCPSolver::visitStoreInst(StoreInst &SI) { diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 06b000f..59dc528 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -1166,10 +1166,9 @@ public: } else { continue; } - Instruction *DbgVal = - DIB.insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()), - DIExpression(DVI->getExpression()), Inst); - DbgVal->setDebugLoc(DVI->getDebugLoc()); + DIB.insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()), + DIExpression(DVI->getExpression()), + DVI->getDebugLoc(), Inst); } } }; @@ -1552,7 +1551,8 @@ static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr, if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero()) return BasePtr; - return IRB.CreateInBoundsGEP(BasePtr, Indices, NamePrefix + "sroa_idx"); + return IRB.CreateInBoundsGEP(nullptr, BasePtr, Indices, + NamePrefix + "sroa_idx"); } /// \brief Get a natural GEP off of the BasePtr walking through Ty toward @@ -1803,7 +1803,8 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr - : IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset), + : IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Int8Ptr, + IRB.getInt(Int8PtrOffset), NamePrefix + "sroa_raw_idx"); } Ptr = OffsetPtr; @@ -3250,7 +3251,8 @@ private: void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) { assert(Ty->isSingleValueType()); // Load the single value and insert it using the indices. - Value *GEP = IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep"); + Value *GEP = + IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep"); Value *Load = IRB.CreateLoad(GEP, Name + ".load"); Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); DEBUG(dbgs() << " to: " << *Load << "\n"); @@ -3283,7 +3285,7 @@ private: // Extract the single value and store it using the indices. Value *Store = IRB.CreateStore( IRB.CreateExtractValue(Agg, Indices, Name + ".extract"), - IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep")); + IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep")); (void)Store; DEBUG(dbgs() << " to: " << *Store << "\n"); } @@ -4188,14 +4190,14 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { // Create a piece expression describing the new partition or reuse AI's // expression if there is only one partition. DIExpression PieceExpr = Expr; - if (IsSplit || Expr.isBitPiece()) { + if (IsSplit || Expr->isBitPiece()) { // If this alloca is already a scalar replacement of a larger aggregate, // Piece.Offset describes the offset inside the scalar. - uint64_t Offset = Expr.isBitPiece() ? Expr.getBitPieceOffset() : 0; + uint64_t Offset = Expr->isBitPiece() ? Expr->getBitPieceOffset() : 0; uint64_t Start = Offset + Piece.Offset; uint64_t Size = Piece.Size; - if (Expr.isBitPiece()) { - uint64_t AbsEnd = Expr.getBitPieceOffset() + Expr.getBitPieceSize(); + if (Expr->isBitPiece()) { + uint64_t AbsEnd = Expr->getBitPieceOffset() + Expr->getBitPieceSize(); if (Start >= AbsEnd) // No need to describe a SROAed padding. continue; @@ -4208,8 +4210,8 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { if (DbgDeclareInst *OldDDI = FindAllocaDbgDeclare(Piece.Alloca)) OldDDI->eraseFromParent(); - auto *NewDDI = DIB.insertDeclare(Piece.Alloca, Var, PieceExpr, &AI); - NewDDI->setDebugLoc(DbgDecl->getDebugLoc()); + DIB.insertDeclare(Piece.Alloca, Var, PieceExpr, DbgDecl->getDebugLoc(), + &AI); } } return Changed; diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp index 3e7cf04..f99fe3f 100644 --- a/lib/Transforms/Scalar/SampleProfile.cpp +++ b/lib/Transforms/Scalar/SampleProfile.cpp @@ -217,16 +217,16 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS, BasicBlock *BB) { /// \returns The profiled weight of I. unsigned SampleProfileLoader::getInstWeight(Instruction &Inst) { DebugLoc DLoc = Inst.getDebugLoc(); - if (DLoc.isUnknown()) + if (!DLoc) return 0; unsigned Lineno = DLoc.getLine(); if (Lineno < HeaderLineno) return 0; - DILocation DIL(DLoc.getAsMDNode(*Ctx)); + DILocation DIL = DLoc.get(); int LOffset = Lineno - HeaderLineno; - unsigned Discriminator = DIL.getDiscriminator(); + unsigned Discriminator = DIL->getDiscriminator(); unsigned Weight = Samples->samplesAt(LOffset, Discriminator); DEBUG(dbgs() << " " << Lineno << "." << Discriminator << ":" << Inst << " (line offset: " << LOffset << "." << Discriminator @@ -642,9 +642,8 @@ void SampleProfileLoader::propagateWeights(Function &F) { /// \returns the line number where \p F is defined. If it returns 0, /// it means that there is no debug information available for \p F. unsigned SampleProfileLoader::getFunctionLoc(Function &F) { - DISubprogram S = getDISubprogram(&F); - if (S.isSubprogram()) - return S.getLineNumber(); + if (MDSubprogram *S = getDISubprogram(&F)) + return S->getLine(); // If could not find the start of \p F, emit a diagnostic to inform the user // about the missed opportunity. diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 6cc8411..42095ae 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -59,6 +59,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeLowerExpectIntrinsicPass(Registry); initializeMemCpyOptPass(Registry); initializeMergedLoadStoreMotionPass(Registry); + initializeNaryReassociatePass(Registry); initializePartiallyInlineLibCallsPass(Registry); initializeReassociatePass(Registry); initializeRegToMemPass(Registry); @@ -77,6 +78,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeLoadCombinePass(Registry); initializePlaceBackedgeSafepointsImplPass(Registry); initializePlaceSafepointsPass(Registry); + initializeFloat2IntPass(Registry); } void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) { diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index acd8585..693c5ae 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -1117,10 +1117,9 @@ public: } else { continue; } - Instruction *DbgVal = DIB->insertDbgValueIntrinsic( - Arg, 0, DIVariable(DVI->getVariable()), - DIExpression(DVI->getExpression()), Inst); - DbgVal->setDebugLoc(DVI->getDebugLoc()); + DIB->insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()), + DIExpression(DVI->getExpression()), + DVI->getDebugLoc(), Inst); } } }; @@ -2135,7 +2134,7 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI, // split the alloca again later. unsigned AS = AI->getType()->getAddressSpace(); Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy(AS)); - V = Builder.CreateGEP(V, Builder.getInt64(NewOffset)); + V = Builder.CreateGEP(Builder.getInt8Ty(), V, Builder.getInt64(NewOffset)); IdxTy = NewElts[Idx]->getAllocatedType(); uint64_t EltSize = DL.getTypeAllocSize(IdxTy) - NewOffset; diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp index a457cba..d55dc6a 100644 --- a/lib/Transforms/Scalar/Scalarizer.cpp +++ b/lib/Transforms/Scalar/Scalarizer.cpp @@ -213,7 +213,7 @@ Value *Scatterer::operator[](unsigned I) { CV[0] = Builder.CreateBitCast(V, Ty, V->getName() + ".i0"); } if (I != 0) - CV[I] = Builder.CreateConstGEP1_32(CV[0], I, + CV[I] = Builder.CreateConstGEP1_32(nullptr, CV[0], I, V->getName() + ".i" + Twine(I)); } else { // Search through a chain of InsertElementInsts looking for element I. diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 1a04d74..8af4753 100644 --- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -757,14 +757,16 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs( } } // Create an ugly GEP with a single index for each index. - ResultPtr = Builder.CreateGEP(ResultPtr, Idx, "uglygep"); + ResultPtr = + Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Idx, "uglygep"); } } // Create a GEP with the constant offset index. if (AccumulativeByteOffset != 0) { Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset); - ResultPtr = Builder.CreateGEP(ResultPtr, Offset, "uglygep"); + ResultPtr = + Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep"); } if (ResultPtr->getType() != Variadic->getType()) ResultPtr = Builder.CreateBitCast(ResultPtr, Variadic->getType()); diff --git a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp index e71031c..2fc9368 100644 --- a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp +++ b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp @@ -15,42 +15,46 @@ // // There are many optimizations we can perform in the domain of SLSR. This file // for now contains only an initial step. Specifically, we look for strength -// reduction candidates in two forms: +// reduction candidates in the following forms: // -// Form 1: (B + i) * S -// Form 2: &B[i * S] +// Form 1: B + i * S +// Form 2: (B + i) * S +// Form 3: &B[i * S] // // where S is an integer variable, and i is a constant integer. If we found two -// candidates +// candidates S1 and S2 in the same form and S1 dominates S2, we may rewrite S2 +// in a simpler way with respect to S1. For example, // -// S1: X = (B + i) * S -// S2: Y = (B + i') * S +// S1: X = B + i * S +// S2: Y = B + i' * S => X + (i' - i) * S // -// or +// S1: X = (B + i) * S +// S2: Y = (B + i') * S => X + (i' - i) * S // // S1: X = &B[i * S] -// S2: Y = &B[i' * S] -// -// and S1 dominates S2, we call S1 a basis of S2, and can replace S2 with +// S2: Y = &B[i' * S] => &X[(i' - i) * S] // -// Y = X + (i' - i) * S +// Note: (i' - i) * S is folded to the extent possible. // -// or +// This rewriting is in general a good idea. The code patterns we focus on +// usually come from loop unrolling, so (i' - i) * S is likely the same +// across iterations and can be reused. When that happens, the optimized form +// takes only one add starting from the second iteration. // -// Y = &X[(i' - i) * S] -// -// where (i' - i) * S is folded to the extent possible. When S2 has multiple -// bases, we pick the one that is closest to S2, or S2's "immediate" basis. +// When such rewriting is possible, we call S1 a "basis" of S2. When S2 has +// multiple bases, we choose to rewrite S2 with respect to its "immediate" +// basis, the basis that is the closest ancestor in the dominator tree. // // TODO: // -// - Handle candidates in the form of B + i * S -// // - Floating point arithmetics when fast math is enabled. // // - SLSR may decrease ILP at the architecture level. Targets that are very // sensitive to ILP may want to disable it. Having SLSR to consider ILP is // left as future work. +// +// - When (i' - i) is constant but i and i' are not, we could still perform +// SLSR. #include <vector> #include "llvm/ADT/DenseSet.h" @@ -72,13 +76,12 @@ namespace { class StraightLineStrengthReduce : public FunctionPass { public: - // SLSR candidate. Such a candidate must be in the form of - // (Base + Index) * Stride - // or - // Base[..][Index * Stride][..] + // SLSR candidate. Such a candidate must be in one of the forms described in + // the header comments. struct Candidate : public ilist_node<Candidate> { enum Kind { Invalid, // reserved for the default constructor + Add, // B + i * S Mul, // (B + i) * S GEP, // &B[..][i * S][..] }; @@ -92,14 +95,14 @@ public: Basis(nullptr) {} Kind CandidateKind; const SCEV *Base; - // Note that Index and Stride of a GEP candidate may not have the same - // integer type. In that case, during rewriting, Stride will be + // Note that Index and Stride of a GEP candidate do not necessarily have the + // same integer type. In that case, during rewriting, Stride will be // sign-extended or truncated to Index's type. ConstantInt *Index; Value *Stride; // The instruction this candidate corresponds to. It helps us to rewrite a // candidate with respect to its immediate basis. Note that one instruction - // can corresponds to multiple candidates depending on how you associate the + // can correspond to multiple candidates depending on how you associate the // expression. For instance, // // (a + 1) * (b + 2) @@ -143,31 +146,43 @@ private: // Returns true if Basis is a basis for C, i.e., Basis dominates C and they // share the same base and stride. bool isBasisFor(const Candidate &Basis, const Candidate &C); + // Returns whether the candidate can be folded into an addressing mode. + bool isFoldable(const Candidate &C, TargetTransformInfo *TTI, + const DataLayout *DL); + // Returns true if C is already in a simplest form and not worth being + // rewritten. + bool isSimplestForm(const Candidate &C); // Checks whether I is in a candidate form. If so, adds all the matching forms // to Candidates, and tries to find the immediate basis for each of them. - void allocateCandidateAndFindBasis(Instruction *I); + void allocateCandidatesAndFindBasis(Instruction *I); + // Allocate candidates and find bases for Add instructions. + void allocateCandidatesAndFindBasisForAdd(Instruction *I); + // Given I = LHS + RHS, factors RHS into i * S and makes (LHS + i * S) a + // candidate. + void allocateCandidatesAndFindBasisForAdd(Value *LHS, Value *RHS, + Instruction *I); // Allocate candidates and find bases for Mul instructions. - void allocateCandidateAndFindBasisForMul(Instruction *I); + void allocateCandidatesAndFindBasisForMul(Instruction *I); // Splits LHS into Base + Index and, if succeeds, calls - // allocateCandidateAndFindBasis. - void allocateCandidateAndFindBasisForMul(Value *LHS, Value *RHS, - Instruction *I); + // allocateCandidatesAndFindBasis. + void allocateCandidatesAndFindBasisForMul(Value *LHS, Value *RHS, + Instruction *I); // Allocate candidates and find bases for GetElementPtr instructions. - void allocateCandidateAndFindBasisForGEP(GetElementPtrInst *GEP); + void allocateCandidatesAndFindBasisForGEP(GetElementPtrInst *GEP); // A helper function that scales Idx with ElementSize before invoking - // allocateCandidateAndFindBasis. - void allocateCandidateAndFindBasisForGEP(const SCEV *B, ConstantInt *Idx, - Value *S, uint64_t ElementSize, - Instruction *I); + // allocateCandidatesAndFindBasis. + void allocateCandidatesAndFindBasisForGEP(const SCEV *B, ConstantInt *Idx, + Value *S, uint64_t ElementSize, + Instruction *I); // Adds the given form <CT, B, Idx, S> to Candidates, and finds its immediate // basis. - void allocateCandidateAndFindBasis(Candidate::Kind CT, const SCEV *B, - ConstantInt *Idx, Value *S, - Instruction *I); + void allocateCandidatesAndFindBasis(Candidate::Kind CT, const SCEV *B, + ConstantInt *Idx, Value *S, + Instruction *I); // Rewrites candidate C with respect to Basis. void rewriteCandidateWithBasis(const Candidate &C, const Candidate &Basis); // A helper function that factors ArrayIdx to a product of a stride and a - // constant index, and invokes allocateCandidateAndFindBasis with the + // constant index, and invokes allocateCandidatesAndFindBasis with the // factorings. void factorArrayIndex(Value *ArrayIdx, const SCEV *Base, uint64_t ElementSize, GetElementPtrInst *GEP); @@ -187,7 +202,7 @@ private: // Temporarily holds all instructions that are unlinked (but not deleted) by // rewriteCandidateWithBasis. These instructions will be actually removed // after all rewriting finishes. - DenseSet<Instruction *> UnlinkedInstructions; + std::vector<Instruction *> UnlinkedInstructions; }; } // anonymous namespace @@ -215,9 +230,9 @@ bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis, Basis.CandidateKind == C.CandidateKind); } -static bool isCompletelyFoldable(GetElementPtrInst *GEP, - const TargetTransformInfo *TTI, - const DataLayout *DL) { +static bool isGEPFoldable(GetElementPtrInst *GEP, + const TargetTransformInfo *TTI, + const DataLayout *DL) { GlobalVariable *BaseGV = nullptr; int64_t BaseOffset = 0; bool HasBaseReg = false; @@ -252,53 +267,143 @@ static bool isCompletelyFoldable(GetElementPtrInst *GEP, BaseOffset, HasBaseReg, Scale); } -// TODO: We currently implement an algorithm whose time complexity is linear to -// the number of existing candidates. However, a better algorithm exists. We -// could depth-first search the dominator tree, and maintain a hash table that -// contains all candidates that dominate the node being traversed. This hash -// table is indexed by the base and the stride of a candidate. Therefore, -// finding the immediate basis of a candidate boils down to one hash-table look -// up. -void StraightLineStrengthReduce::allocateCandidateAndFindBasis( - Candidate::Kind CT, const SCEV *B, ConstantInt *Idx, Value *S, - Instruction *I) { - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { - // If &B[Idx * S] fits into an addressing mode, do not turn it into - // non-free computation. - if (isCompletelyFoldable(GEP, TTI, DL)) - return; +// Returns whether (Base + Index * Stride) can be folded to an addressing mode. +static bool isAddFoldable(const SCEV *Base, ConstantInt *Index, Value *Stride, + TargetTransformInfo *TTI) { + return TTI->isLegalAddressingMode(Base->getType(), nullptr, 0, true, + Index->getSExtValue()); +} + +bool StraightLineStrengthReduce::isFoldable(const Candidate &C, + TargetTransformInfo *TTI, + const DataLayout *DL) { + if (C.CandidateKind == Candidate::Add) + return isAddFoldable(C.Base, C.Index, C.Stride, TTI); + if (C.CandidateKind == Candidate::GEP) + return isGEPFoldable(cast<GetElementPtrInst>(C.Ins), TTI, DL); + return false; +} + +// Returns true if GEP has zero or one non-zero index. +static bool hasOnlyOneNonZeroIndex(GetElementPtrInst *GEP) { + unsigned NumNonZeroIndices = 0; + for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I) { + ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I); + if (ConstIdx == nullptr || !ConstIdx->isZero()) + ++NumNonZeroIndices; + } + return NumNonZeroIndices <= 1; +} + +bool StraightLineStrengthReduce::isSimplestForm(const Candidate &C) { + if (C.CandidateKind == Candidate::Add) { + // B + 1 * S or B + (-1) * S + return C.Index->isOne() || C.Index->isMinusOne(); + } + if (C.CandidateKind == Candidate::Mul) { + // (B + 0) * S + return C.Index->isZero(); + } + if (C.CandidateKind == Candidate::GEP) { + // (char*)B + S or (char*)B - S + return ((C.Index->isOne() || C.Index->isMinusOne()) && + hasOnlyOneNonZeroIndex(cast<GetElementPtrInst>(C.Ins))); } + return false; +} +// TODO: We currently implement an algorithm whose time complexity is linear in +// the number of existing candidates. However, we could do better by using +// ScopedHashTable. Specifically, while traversing the dominator tree, we could +// maintain all the candidates that dominate the basic block being traversed in +// a ScopedHashTable. This hash table is indexed by the base and the stride of +// a candidate. Therefore, finding the immediate basis of a candidate boils down +// to one hash-table look up. +void StraightLineStrengthReduce::allocateCandidatesAndFindBasis( + Candidate::Kind CT, const SCEV *B, ConstantInt *Idx, Value *S, + Instruction *I) { Candidate C(CT, B, Idx, S, I); - // Try to compute the immediate basis of C. - unsigned NumIterations = 0; - // Limit the scan radius to avoid running forever. - static const unsigned MaxNumIterations = 50; - for (auto Basis = Candidates.rbegin(); - Basis != Candidates.rend() && NumIterations < MaxNumIterations; - ++Basis, ++NumIterations) { - if (isBasisFor(*Basis, C)) { - C.Basis = &(*Basis); - break; + // SLSR can complicate an instruction in two cases: + // + // 1. If we can fold I into an addressing mode, computing I is likely free or + // takes only one instruction. + // + // 2. I is already in a simplest form. For example, when + // X = B + 8 * S + // Y = B + S, + // rewriting Y to X - 7 * S is probably a bad idea. + // + // In the above cases, we still add I to the candidate list so that I can be + // the basis of other candidates, but we leave I's basis blank so that I + // won't be rewritten. + if (!isFoldable(C, TTI, DL) && !isSimplestForm(C)) { + // Try to compute the immediate basis of C. + unsigned NumIterations = 0; + // Limit the scan radius to avoid running in quadratice time. + static const unsigned MaxNumIterations = 50; + for (auto Basis = Candidates.rbegin(); + Basis != Candidates.rend() && NumIterations < MaxNumIterations; + ++Basis, ++NumIterations) { + if (isBasisFor(*Basis, C)) { + C.Basis = &(*Basis); + break; + } } } // Regardless of whether we find a basis for C, we need to push C to the - // candidate list. + // candidate list so that it can be the basis of other candidates. Candidates.push_back(C); } -void StraightLineStrengthReduce::allocateCandidateAndFindBasis(Instruction *I) { +void StraightLineStrengthReduce::allocateCandidatesAndFindBasis( + Instruction *I) { switch (I->getOpcode()) { + case Instruction::Add: + allocateCandidatesAndFindBasisForAdd(I); + break; case Instruction::Mul: - allocateCandidateAndFindBasisForMul(I); + allocateCandidatesAndFindBasisForMul(I); break; case Instruction::GetElementPtr: - allocateCandidateAndFindBasisForGEP(cast<GetElementPtrInst>(I)); + allocateCandidatesAndFindBasisForGEP(cast<GetElementPtrInst>(I)); break; } } -void StraightLineStrengthReduce::allocateCandidateAndFindBasisForMul( +void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd( + Instruction *I) { + // Try matching B + i * S. + if (!isa<IntegerType>(I->getType())) + return; + + assert(I->getNumOperands() == 2 && "isn't I an add?"); + Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); + allocateCandidatesAndFindBasisForAdd(LHS, RHS, I); + if (LHS != RHS) + allocateCandidatesAndFindBasisForAdd(RHS, LHS, I); +} + +void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd( + Value *LHS, Value *RHS, Instruction *I) { + Value *S = nullptr; + ConstantInt *Idx = nullptr; + if (match(RHS, m_Mul(m_Value(S), m_ConstantInt(Idx)))) { + // I = LHS + RHS = LHS + Idx * S + allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I); + } else if (match(RHS, m_Shl(m_Value(S), m_ConstantInt(Idx)))) { + // I = LHS + RHS = LHS + (S << Idx) = LHS + S * (1 << Idx) + APInt One(Idx->getBitWidth(), 1); + Idx = ConstantInt::get(Idx->getContext(), One << Idx->getValue()); + allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I); + } else { + // At least, I = LHS + 1 * RHS + ConstantInt *One = ConstantInt::get(cast<IntegerType>(I->getType()), 1); + allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), One, RHS, + I); + } +} + +void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul( Value *LHS, Value *RHS, Instruction *I) { Value *B = nullptr; ConstantInt *Idx = nullptr; @@ -306,54 +411,54 @@ void StraightLineStrengthReduce::allocateCandidateAndFindBasisForMul( if (match(LHS, m_Add(m_Value(B), m_ConstantInt(Idx)))) { // If LHS is in the form of "Base + Index", then I is in the form of // "(Base + Index) * RHS". - allocateCandidateAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I); + allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I); } else { // Otherwise, at least try the form (LHS + 0) * RHS. ConstantInt *Zero = ConstantInt::get(cast<IntegerType>(I->getType()), 0); - allocateCandidateAndFindBasis(Candidate::Mul, SE->getSCEV(LHS), Zero, RHS, + allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(LHS), Zero, RHS, I); } } -void StraightLineStrengthReduce::allocateCandidateAndFindBasisForMul( +void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul( Instruction *I) { // Try matching (B + i) * S. // TODO: we could extend SLSR to float and vector types. if (!isa<IntegerType>(I->getType())) return; + assert(I->getNumOperands() == 2 && "isn't I a mul?"); Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); - allocateCandidateAndFindBasisForMul(LHS, RHS, I); + allocateCandidatesAndFindBasisForMul(LHS, RHS, I); if (LHS != RHS) { // Symmetrically, try to split RHS to Base + Index. - allocateCandidateAndFindBasisForMul(RHS, LHS, I); + allocateCandidatesAndFindBasisForMul(RHS, LHS, I); } } -void StraightLineStrengthReduce::allocateCandidateAndFindBasisForGEP( +void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP( const SCEV *B, ConstantInt *Idx, Value *S, uint64_t ElementSize, Instruction *I) { - // I = B + sext(Idx *nsw S) *nsw ElementSize + // I = B + sext(Idx *nsw S) * ElementSize + // = B + (sext(Idx) * sext(S)) * ElementSize // = B + (sext(Idx) * ElementSize) * sext(S) // Casting to IntegerType is safe because we skipped vector GEPs. IntegerType *IntPtrTy = cast<IntegerType>(DL->getIntPtrType(I->getType())); ConstantInt *ScaledIdx = ConstantInt::get( IntPtrTy, Idx->getSExtValue() * (int64_t)ElementSize, true); - allocateCandidateAndFindBasis(Candidate::GEP, B, ScaledIdx, S, I); + allocateCandidatesAndFindBasis(Candidate::GEP, B, ScaledIdx, S, I); } void StraightLineStrengthReduce::factorArrayIndex(Value *ArrayIdx, const SCEV *Base, uint64_t ElementSize, GetElementPtrInst *GEP) { - // At least, ArrayIdx = ArrayIdx *s 1. - allocateCandidateAndFindBasisForGEP( + // At least, ArrayIdx = ArrayIdx *nsw 1. + allocateCandidatesAndFindBasisForGEP( Base, ConstantInt::get(cast<IntegerType>(ArrayIdx->getType()), 1), ArrayIdx, ElementSize, GEP); Value *LHS = nullptr; ConstantInt *RHS = nullptr; - // TODO: handle shl. e.g., we could treat (S << 2) as (S * 4). - // // One alternative is matching the SCEV of ArrayIdx instead of ArrayIdx // itself. This would allow us to handle the shl case for free. However, // matching SCEVs has two issues: @@ -367,12 +472,19 @@ void StraightLineStrengthReduce::factorArrayIndex(Value *ArrayIdx, // sext'ed multiplication. if (match(ArrayIdx, m_NSWMul(m_Value(LHS), m_ConstantInt(RHS)))) { // SLSR is currently unsafe if i * S may overflow. - // GEP = Base + sext(LHS *nsw RHS) *nsw ElementSize - allocateCandidateAndFindBasisForGEP(Base, RHS, LHS, ElementSize, GEP); + // GEP = Base + sext(LHS *nsw RHS) * ElementSize + allocateCandidatesAndFindBasisForGEP(Base, RHS, LHS, ElementSize, GEP); + } else if (match(ArrayIdx, m_NSWShl(m_Value(LHS), m_ConstantInt(RHS)))) { + // GEP = Base + sext(LHS <<nsw RHS) * ElementSize + // = Base + sext(LHS *nsw (1 << RHS)) * ElementSize + APInt One(RHS->getBitWidth(), 1); + ConstantInt *PowerOf2 = + ConstantInt::get(RHS->getContext(), One << RHS->getValue()); + allocateCandidatesAndFindBasisForGEP(Base, PowerOf2, LHS, ElementSize, GEP); } } -void StraightLineStrengthReduce::allocateCandidateAndFindBasisForGEP( +void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP( GetElementPtrInst *GEP) { // TODO: handle vector GEPs if (GEP->getType()->isVectorTy()) @@ -436,6 +548,7 @@ Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis, else BumpWithUglyGEP = true; } + // Compute Bump = C - Basis = (i' - i) * S. // Common case 1: if (i' - i) is 1, Bump = S. if (IndexOffset.getSExtValue() == 1) @@ -443,9 +556,24 @@ Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis, // Common case 2: if (i' - i) is -1, Bump = -S. if (IndexOffset.getSExtValue() == -1) return Builder.CreateNeg(C.Stride); - // Otherwise, Bump = (i' - i) * sext/trunc(S). - ConstantInt *Delta = ConstantInt::get(Basis.Ins->getContext(), IndexOffset); - Value *ExtendedStride = Builder.CreateSExtOrTrunc(C.Stride, Delta->getType()); + + // Otherwise, Bump = (i' - i) * sext/trunc(S). Note that (i' - i) and S may + // have different bit widths. + IntegerType *DeltaType = + IntegerType::get(Basis.Ins->getContext(), IndexOffset.getBitWidth()); + Value *ExtendedStride = Builder.CreateSExtOrTrunc(C.Stride, DeltaType); + if (IndexOffset.isPowerOf2()) { + // If (i' - i) is a power of 2, Bump = sext/trunc(S) << log(i' - i). + ConstantInt *Exponent = ConstantInt::get(DeltaType, IndexOffset.logBase2()); + return Builder.CreateShl(ExtendedStride, Exponent); + } + if ((-IndexOffset).isPowerOf2()) { + // If (i - i') is a power of 2, Bump = -sext/trunc(S) << log(i' - i). + ConstantInt *Exponent = + ConstantInt::get(DeltaType, (-IndexOffset).logBase2()); + return Builder.CreateNeg(Builder.CreateShl(ExtendedStride, Exponent)); + } + Constant *Delta = ConstantInt::get(DeltaType, IndexOffset); return Builder.CreateMul(ExtendedStride, Delta); } @@ -453,6 +581,9 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis( const Candidate &C, const Candidate &Basis) { assert(C.CandidateKind == Basis.CandidateKind && C.Base == Basis.Base && C.Stride == Basis.Stride); + // We run rewriteCandidateWithBasis on all candidates in a post-order, so the + // basis of a candidate cannot be unlinked before the candidate. + assert(Basis.Ins->getParent() != nullptr && "the basis is unlinked"); // An instruction can correspond to multiple candidates. Therefore, instead of // simply deleting an instruction when we rewrite it, we mark its parent as @@ -466,25 +597,38 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis( Value *Bump = emitBump(Basis, C, Builder, DL, BumpWithUglyGEP); Value *Reduced = nullptr; // equivalent to but weaker than C.Ins switch (C.CandidateKind) { + case Candidate::Add: case Candidate::Mul: - Reduced = Builder.CreateAdd(Basis.Ins, Bump); + if (BinaryOperator::isNeg(Bump)) { + Reduced = + Builder.CreateSub(Basis.Ins, BinaryOperator::getNegArgument(Bump)); + } else { + Reduced = Builder.CreateAdd(Basis.Ins, Bump); + } break; case Candidate::GEP: { Type *IntPtrTy = DL->getIntPtrType(C.Ins->getType()); + bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds(); if (BumpWithUglyGEP) { // C = (char *)Basis + Bump unsigned AS = Basis.Ins->getType()->getPointerAddressSpace(); Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS); Reduced = Builder.CreateBitCast(Basis.Ins, CharTy); - // We only considered inbounds GEP as candidates. - Reduced = Builder.CreateInBoundsGEP(Reduced, Bump); + if (InBounds) + Reduced = + Builder.CreateInBoundsGEP(Builder.getInt8Ty(), Reduced, Bump); + else + Reduced = Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump); Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType()); } else { // C = gep Basis, Bump // Canonicalize bump to pointer size. Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy); - Reduced = Builder.CreateInBoundsGEP(Basis.Ins, Bump); + if (InBounds) + Reduced = Builder.CreateInBoundsGEP(nullptr, Basis.Ins, Bump); + else + Reduced = Builder.CreateGEP(nullptr, Basis.Ins, Bump); } } break; @@ -497,7 +641,7 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis( // Unlink C.Ins so that we can skip other candidates also corresponding to // C.Ins. The actual deletion is postponed to the end of runOnFunction. C.Ins->removeFromParent(); - UnlinkedInstructions.insert(C.Ins); + UnlinkedInstructions.push_back(C.Ins); } bool StraightLineStrengthReduce::runOnFunction(Function &F) { @@ -512,7 +656,7 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) { for (auto node = GraphTraits<DominatorTree *>::nodes_begin(DT); node != GraphTraits<DominatorTree *>::nodes_end(DT); ++node) { for (auto &I : *node->getBlock()) - allocateCandidateAndFindBasis(&I); + allocateCandidatesAndFindBasis(&I); } // Rewrite candidates in the reverse depth-first order. This order makes sure diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp index 6c3ce58..4f23e20 100644 --- a/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -887,7 +887,7 @@ void StructurizeCFG::createFlow() { /// no longer dominate all their uses. Not sure if this is really nessasary void StructurizeCFG::rebuildSSA() { SSAUpdater Updater; - for (const auto &BB : ParentRegion->blocks()) + for (auto *BB : ParentRegion->blocks()) for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; ++II) { diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp index 820544b..c1cd39a 100644 --- a/lib/Transforms/Utils/AddDiscriminators.cpp +++ b/lib/Transforms/Utils/AddDiscriminators.cpp @@ -174,42 +174,51 @@ bool AddDiscriminators::runOnFunction(Function &F) { for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { BasicBlock *B = I; TerminatorInst *Last = B->getTerminator(); - DebugLoc LastLoc = Last->getDebugLoc(); - if (LastLoc.isUnknown()) continue; - DILocation LastDIL(LastLoc.getAsMDNode(Ctx)); + DILocation LastDIL = Last->getDebugLoc().get(); + if (!LastDIL) + continue; for (unsigned I = 0; I < Last->getNumSuccessors(); ++I) { BasicBlock *Succ = Last->getSuccessor(I); Instruction *First = Succ->getFirstNonPHIOrDbgOrLifetime(); - DebugLoc FirstLoc = First->getDebugLoc(); - if (FirstLoc.isUnknown()) continue; - DILocation FirstDIL(FirstLoc.getAsMDNode(Ctx)); + DILocation FirstDIL = First->getDebugLoc().get(); + if (!FirstDIL) + continue; // If the first instruction (First) of Succ is at the same file // location as B's last instruction (Last), add a new // discriminator for First's location and all the instructions // in Succ that share the same location with First. - if (FirstDIL.atSameLineAs(LastDIL)) { + if (!FirstDIL->canDiscriminate(*LastDIL)) { // Create a new lexical scope and compute a new discriminator // number for it. - StringRef Filename = FirstDIL.getFilename(); - DIScope Scope = FirstDIL.getScope(); - DIFile File = Builder.createFile(Filename, Scope.getDirectory()); - unsigned Discriminator = FirstDIL.computeNewDiscriminator(Ctx); + StringRef Filename = FirstDIL->getFilename(); + auto *Scope = FirstDIL->getScope(); + DIFile File = Builder.createFile(Filename, Scope->getDirectory()); + + // FIXME: Calculate the discriminator here, based on local information, + // and delete MDLocation::computeNewDiscriminator(). The current + // solution gives different results depending on other modules in the + // same context. All we really need is to discriminate between + // FirstDIL and LastDIL -- a local map would suffice. + unsigned Discriminator = FirstDIL->computeNewDiscriminator(); DILexicalBlockFile NewScope = Builder.createLexicalBlockFile(Scope, File, Discriminator); - DILocation NewDIL = FirstDIL.copyWithNewScope(Ctx, NewScope); - DebugLoc newDebugLoc = DebugLoc::getFromDILocation(NewDIL); + auto *NewDIL = + MDLocation::get(Ctx, FirstDIL->getLine(), FirstDIL->getColumn(), + NewScope, FirstDIL->getInlinedAt()); + DebugLoc newDebugLoc = NewDIL; // Attach this new debug location to First and every // instruction following First that shares the same location. for (BasicBlock::iterator I1(*First), E1 = Succ->end(); I1 != E1; ++I1) { - if (I1->getDebugLoc() != FirstLoc) break; + if (I1->getDebugLoc().get() != FirstDIL) + break; I1->setDebugLoc(newDebugLoc); - DEBUG(dbgs() << NewDIL.getFilename() << ":" << NewDIL.getLineNumber() - << ":" << NewDIL.getColumnNumber() << ":" - << NewDIL.getDiscriminator() << *I1 << "\n"); + DEBUG(dbgs() << NewDIL->getFilename() << ":" << NewDIL->getLine() + << ":" << NewDIL->getColumn() << ":" + << NewDIL->getDiscriminator() << *I1 << "\n"); } DEBUG(dbgs() << "\n"); Changed = true; diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index f04ea9c..f200b58 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -157,20 +157,21 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, // Find the MDNode which corresponds to the DISubprogram data that described F. static MDNode* FindSubprogram(const Function *F, DebugInfoFinder &Finder) { for (DISubprogram Subprogram : Finder.subprograms()) { - if (Subprogram.describes(F)) return Subprogram; + if (Subprogram->describes(F)) + return Subprogram; } return nullptr; } // Add an operand to an existing MDNode. The new operand will be added at the // back of the operand list. -static void AddOperand(DICompileUnit CU, DIArray SPs, Metadata *NewSP) { +static void AddOperand(DICompileUnit CU, MDSubprogramArray SPs, Metadata *NewSP) { SmallVector<Metadata *, 16> NewSPs; - NewSPs.reserve(SPs->getNumOperands() + 1); - for (unsigned I = 0, E = SPs->getNumOperands(); I != E; ++I) - NewSPs.push_back(SPs->getOperand(I)); + NewSPs.reserve(SPs.size() + 1); + for (auto *SP : SPs) + NewSPs.push_back(SP); NewSPs.push_back(NewSP); - CU.replaceSubprograms(DIArray(MDNode::get(CU->getContext(), NewSPs))); + CU->replaceSubprograms(MDTuple::get(CU->getContext(), NewSPs)); } // Clone the module-level debug info associated with OldFunc. The cloned data @@ -186,15 +187,15 @@ static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc, // Ensure that OldFunc appears in the map. // (if it's already there it must point to NewFunc anyway) VMap[OldFunc] = NewFunc; - DISubprogram NewSubprogram(MapMetadata(OldSubprogramMDNode, VMap)); + DISubprogram NewSubprogram = + cast<MDSubprogram>(MapMetadata(OldSubprogramMDNode, VMap)); for (DICompileUnit CU : Finder.compile_units()) { - DIArray Subprograms(CU.getSubprograms()); - + auto Subprograms = CU->getSubprograms(); // If the compile unit's function list contains the old function, it should // also contain the new one. - for (unsigned i = 0; i < Subprograms.getNumElements(); i++) { - if ((MDNode*)Subprograms.getElement(i) == OldSubprogramMDNode) { + for (auto *SP : Subprograms) { + if (SP == OldSubprogramMDNode) { AddOperand(CU, Subprograms, NewSubprogram); break; } @@ -395,7 +396,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, if (Action == CloningDirector::CloneSuccessors) { // If the director says to skip with a terminate instruction, we still // need to clone this block's successors. - const TerminatorInst *TI = BB->getTerminator(); + const TerminatorInst *TI = NewBB->getTerminator(); for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) ToClone.push_back(TI->getSuccessor(i)); return; diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp index 52e2d59..44b7d25 100644 --- a/lib/Transforms/Utils/GlobalStatus.cpp +++ b/lib/Transforms/Utils/GlobalStatus.cpp @@ -150,7 +150,7 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, if (MSI->isVolatile()) return true; GS.StoredType = GlobalStatus::Stored; - } else if (ImmutableCallSite C = I) { + } else if (auto C = ImmutableCallSite(I)) { if (!C.isCallee(&U)) return true; GS.IsLoaded = true; diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index df3e1d4..a08ffbe 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -835,11 +835,10 @@ updateInlinedAtInfo(DebugLoc DL, MDLocation *InlinedAtNode, DenseMap<const MDLocation *, MDLocation *> &IANodes) { SmallVector<MDLocation*, 3> InlinedAtLocations; MDLocation *Last = InlinedAtNode; - DebugLoc CurInlinedAt = DL; + MDLocation *CurInlinedAt = DL; // Gather all the inlined-at nodes - while (MDLocation *IA = - cast_or_null<MDLocation>(CurInlinedAt.getInlinedAt(Ctx))) { + while (MDLocation *IA = CurInlinedAt->getInlinedAt()) { // Skip any we've already built nodes for if (MDLocation *Found = IANodes[IA]) { Last = Found; @@ -847,7 +846,7 @@ updateInlinedAtInfo(DebugLoc DL, MDLocation *InlinedAtNode, } InlinedAtLocations.push_back(IA); - CurInlinedAt = DebugLoc::getFromDILocation(IA); + CurInlinedAt = IA; } // Starting from the top, rebuild the nodes to point to the new inlined-at @@ -862,7 +861,7 @@ updateInlinedAtInfo(DebugLoc DL, MDLocation *InlinedAtNode, // And finally create the normal location for this instruction, referring to // the new inlined-at chain. - return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), Last); + return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), Last); } /// Update inlined instructions' line numbers to @@ -870,11 +869,11 @@ updateInlinedAtInfo(DebugLoc DL, MDLocation *InlinedAtNode, static void fixupLineNumbers(Function *Fn, Function::iterator FI, Instruction *TheCall) { DebugLoc TheCallDL = TheCall->getDebugLoc(); - if (TheCallDL.isUnknown()) + if (!TheCallDL) return; auto &Ctx = Fn->getContext(); - auto *InlinedAtNode = cast<MDLocation>(TheCallDL.getAsMDNode(Ctx)); + MDLocation *InlinedAtNode = TheCallDL; // Create a unique call site, not to be confused with any other call from the // same location. @@ -891,7 +890,7 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { DebugLoc DL = BI->getDebugLoc(); - if (DL.isUnknown()) { + if (!DL) { // If the inlined instruction has no line number, make it look as if it // originates from the call location. This is important for // ((__always_inline__, __nodebug__)) functions which must use caller @@ -905,19 +904,6 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, BI->setDebugLoc(TheCallDL); } else { BI->setDebugLoc(updateInlinedAtInfo(DL, InlinedAtNode, BI->getContext(), IANodes)); - if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) { - LLVMContext &Ctx = BI->getContext(); - MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx); - DVI->setOperand(2, MetadataAsValue::get( - Ctx, createInlinedVariable(DVI->getVariable(), - InlinedAt, Ctx))); - } else if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) { - LLVMContext &Ctx = BI->getContext(); - MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx); - DDI->setOperand(1, MetadataAsValue::get( - Ctx, createInlinedVariable(DDI->getVariable(), - InlinedAt, Ctx))); - } } } } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index bd15f9e..1c9760e 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -998,17 +998,14 @@ static bool LdStHasDebugValue(DIVariable &DIVar, Instruction *I) { /// that has an associated llvm.dbg.decl intrinsic. bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI, DIBuilder &Builder) { - DIVariable DIVar(DDI->getVariable()); - DIExpression DIExpr(DDI->getExpression()); - assert((!DIVar || DIVar.isVariable()) && - "Variable in DbgDeclareInst should be either null or a DIVariable."); + DIVariable DIVar = DDI->getVariable(); + DIExpression DIExpr = DDI->getExpression(); if (!DIVar) return false; if (LdStHasDebugValue(DIVar, SI)) return true; - Instruction *DbgVal = nullptr; // If an argument is zero extended then use argument directly. The ZExt // may be zapped by an optimization pass in future. Argument *ExtendedArg = nullptr; @@ -1017,11 +1014,11 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0)); if (ExtendedArg) - DbgVal = Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, DIExpr, SI); + Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, DIExpr, + DDI->getDebugLoc(), SI); else - DbgVal = Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, - DIExpr, SI); - DbgVal->setDebugLoc(DDI->getDebugLoc()); + Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, DIExpr, + DDI->getDebugLoc(), SI); return true; } @@ -1029,19 +1026,16 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, /// that has an associated llvm.dbg.decl intrinsic. bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, LoadInst *LI, DIBuilder &Builder) { - DIVariable DIVar(DDI->getVariable()); - DIExpression DIExpr(DDI->getExpression()); - assert((!DIVar || DIVar.isVariable()) && - "Variable in DbgDeclareInst should be either null or a DIVariable."); + DIVariable DIVar = DDI->getVariable(); + DIExpression DIExpr = DDI->getExpression(); if (!DIVar) return false; if (LdStHasDebugValue(DIVar, LI)) return true; - Instruction *DbgVal = - Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, DIExpr, LI); - DbgVal->setDebugLoc(DDI->getDebugLoc()); + Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, DIExpr, + DDI->getDebugLoc(), LI); return true; } @@ -1083,10 +1077,9 @@ bool llvm::LowerDbgDeclare(Function &F) { // This is a call by-value or some other instruction that // takes a pointer to the variable. Insert a *value* // intrinsic that describes the alloca. - auto DbgVal = DIB.insertDbgValueIntrinsic( - AI, 0, DIVariable(DDI->getVariable()), - DIExpression(DDI->getExpression()), CI); - DbgVal->setDebugLoc(DDI->getDebugLoc()); + DIB.insertDbgValueIntrinsic(AI, 0, DIVariable(DDI->getVariable()), + DIExpression(DDI->getExpression()), + DDI->getDebugLoc(), CI); } DDI->eraseFromParent(); } @@ -1112,10 +1105,8 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, if (!DDI) return false; DebugLoc Loc = DDI->getDebugLoc(); - DIVariable DIVar(DDI->getVariable()); - DIExpression DIExpr(DDI->getExpression()); - assert((!DIVar || DIVar.isVariable()) && - "Variable in DbgDeclareInst should be either null or a DIVariable."); + DIVariable DIVar = DDI->getVariable(); + DIExpression DIExpr = DDI->getExpression(); if (!DIVar) return false; @@ -1127,16 +1118,14 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, SmallVector<uint64_t, 4> NewDIExpr; NewDIExpr.push_back(dwarf::DW_OP_deref); if (DIExpr) - for (unsigned i = 0, n = DIExpr.getNumElements(); i < n; ++i) - NewDIExpr.push_back(DIExpr.getElement(i)); + NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()); DIExpr = Builder.createExpression(NewDIExpr); } // Insert llvm.dbg.declare in the same basic block as the original alloca, // and remove old llvm.dbg.declare. BasicBlock *BB = AI->getParent(); - Builder.insertDeclare(NewAllocaAddress, DIVar, DIExpr, BB) - ->setDebugLoc(Loc); + Builder.insertDeclare(NewAllocaAddress, DIVar, DIExpr, Loc, BB); DDI->eraseFromParent(); return true; } diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 6b3aa02..1dbce47 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -146,6 +146,13 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM, /// Similarly, TripMultiple divides the number of times that the LatchBlock may /// execute without exiting the loop. /// +/// If AllowRuntime is true then UnrollLoop will consider unrolling loops that +/// have a runtime (i.e. not compile time constant) trip count. Unrolling these +/// loops require a unroll "prologue" that runs "RuntimeTripCount % Count" +/// iterations before branching into the unrolled loop. UnrollLoop will not +/// runtime-unroll the loop if computing RuntimeTripCount will be expensive and +/// AllowExpensiveTripCount is false. +/// /// The LoopInfo Analysis that is passed will be kept consistent. /// /// If a LoopPassManager is passed in, and the loop is fully removed, it will be @@ -154,8 +161,9 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM, /// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are /// available from the Pass it must also preserve those analyses. bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, - bool AllowRuntime, unsigned TripMultiple, LoopInfo *LI, - Pass *PP, LPPassManager *LPM, AssumptionCache *AC) { + bool AllowRuntime, bool AllowExpensiveTripCount, + unsigned TripMultiple, LoopInfo *LI, Pass *PP, + LPPassManager *LPM, AssumptionCache *AC) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); @@ -218,7 +226,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // flag is specified. bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime); - if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM)) + if (RuntimeTripCount && + !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, LPM)) return false; // Notify ScalarEvolution that the loop will be substantially changed, diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 381d8fc..d1774df 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -278,7 +278,8 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, /// ... /// End: /// -bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, +bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, + bool AllowExpensiveTripCount, LoopInfo *LI, LPPassManager *LPM) { // for now, only unroll loops that contain a single exit if (!L->getExitingBlock()) @@ -312,15 +313,20 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, if (isa<SCEVCouldNotCompute>(TripCountSC)) return false; + BasicBlock *Header = L->getHeader(); + const DataLayout &DL = Header->getModule()->getDataLayout(); + SCEVExpander Expander(*SE, DL, "loop-unroll"); + if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L)) + return false; + // We only handle cases when the unroll factor is a power of 2. // Count is the loop unroll factor, the number of extra copies added + 1. if (!isPowerOf2_32(Count)) return false; // This constraint lets us deal with an overflowing trip count easily; see the - // comment on ModVal below. This check is equivalent to `Log2(Count) < - // BEWidth`. - if (static_cast<uint64_t>(Count) > (1ULL << BEWidth)) + // comment on ModVal below. + if (Log2_32(Count) > BEWidth) return false; // If this loop is nested, then the loop unroller changes the code in @@ -333,18 +339,15 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; BasicBlock *PH = L->getLoopPreheader(); - BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); // It helps to splits the original preheader twice, one for the end of the // prolog code and one for a new loop preheader BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI); BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI); BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator()); - const DataLayout &DL = Header->getModule()->getDataLayout(); // Compute the number of extra iterations required, which is: // extra iterations = run-time trip count % (loop unroll factor + 1) - SCEVExpander Expander(*SE, DL, "loop-unroll"); Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp index 35c701e..014574d 100644 --- a/lib/Transforms/Utils/ModuleUtils.cpp +++ b/lib/Transforms/Utils/ModuleUtils.cpp @@ -17,6 +17,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -93,3 +94,13 @@ llvm::collectUsedGlobalVariables(Module &M, SmallPtrSetImpl<GlobalValue *> &Set, } return GV; } + +Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) { + if (isa<Function>(FuncOrBitcast)) + return cast<Function>(FuncOrBitcast); + FuncOrBitcast->dump(); + std::string Err; + raw_string_ostream Stream(Err); + Stream << "Sanitizer interface function redefined: " << *FuncOrBitcast; + report_fatal_error(Err); +} diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 4b34b19..54e1733 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -872,8 +872,10 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, } SmallVector<std::pair<unsigned, BasicBlock *>, 32> DFBlocks; - SmallPtrSet<DomTreeNode *, 32> Visited; SmallVector<DomTreeNode *, 32> Worklist; + SmallPtrSet<DomTreeNode *, 32> VisitedPQ; + SmallPtrSet<DomTreeNode *, 32> VisitedWorklist; + while (!PQ.empty()) { DomTreeNodePair RootPair = PQ.top(); PQ.pop(); @@ -887,6 +889,7 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, Worklist.clear(); Worklist.push_back(Root); + VisitedWorklist.insert(Root); while (!Worklist.empty()) { DomTreeNode *Node = Worklist.pop_back_val(); @@ -905,7 +908,7 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, if (SuccLevel > RootLevel) continue; - if (!Visited.insert(SuccNode).second) + if (!VisitedPQ.insert(SuccNode).second) continue; BasicBlock *SuccBB = SuccNode->getBlock(); @@ -919,7 +922,7 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE; ++CI) { - if (!Visited.count(*CI)) + if (VisitedWorklist.insert(*CI).second) Worklist.push_back(*CI); } } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index c7c0ca6..7c239cb 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1502,7 +1502,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, if (isa<DbgInfoIntrinsic>(I)) continue; - // Only speculatively execution a single instruction (not counting the + // Only speculatively execute a single instruction (not counting the // terminator) for now. ++SpeculationCost; if (SpeculationCost > 1) @@ -3884,8 +3884,8 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { "switch.tableidx.zext"); Value *GEPIndices[] = { Builder.getInt32(0), Index }; - Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices, - "switch.gep"); + Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array, + GEPIndices, "switch.gep"); return Builder.CreateLoad(GEP, "switch.load"); } } diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 5867d65..42102e7 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -222,7 +222,7 @@ Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, // Now that we have the destination's length, we must index into the // destination's pointer to get the actual memcpy destination (end of // the string .. we're concatenating). - Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr"); + Value *CpyDst = B.CreateGEP(B.getInt8Ty(), Dst, DstLen, "endptr"); // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. @@ -303,7 +303,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) { StringRef Str; if (!getConstantStringInfo(SrcStr, Str)) { if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p) - return B.CreateGEP(SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr"); + return B.CreateGEP(B.getInt8Ty(), SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr"); return nullptr; } @@ -316,7 +316,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) { return Constant::getNullValue(CI->getType()); // strchr(s+n,c) -> gep(s+n+i,c) - return B.CreateGEP(SrcStr, B.getInt64(I), "strchr"); + return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strchr"); } Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) { @@ -351,7 +351,7 @@ Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) { return Constant::getNullValue(CI->getType()); // strrchr(s+n,c) -> gep(s+n+i,c) - return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr"); + return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strrchr"); } Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) { @@ -476,7 +476,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) Value *StrLen = EmitStrLen(Src, B, DL, TLI); - return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr; + return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr; } // See if we can get the length of the input string. @@ -487,7 +487,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { Type *PT = FT->getParamType(0); Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len); Value *DstEnd = - B.CreateGEP(Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1)); + B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1)); // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. @@ -597,7 +597,7 @@ Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) { if (I == StringRef::npos) // No match. return Constant::getNullValue(CI->getType()); - return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk"); + return B.CreateGEP(B.getInt8Ty(), CI->getArgOperand(0), B.getInt64(I), "strpbrk"); } // strpbrk(s, "a") -> strchr(s, 'a') @@ -828,7 +828,7 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) { return Constant::getNullValue(CI->getType()); // memchr(s+n,c,l) -> gep(s+n+i,c) - return B.CreateGEP(SrcStr, B.getInt64(I), "memchr"); + return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "memchr"); } Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { @@ -1671,7 +1671,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) { Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char"); Value *Ptr = CastToCStr(CI->getArgOperand(0), B); B.CreateStore(V, Ptr); - Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul"); + Ptr = B.CreateGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul"); B.CreateStore(B.getInt8(0), Ptr); return ConstantInt::get(CI->getType(), 1); @@ -2276,7 +2276,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, // __stpcpy_chk(x,x,...) -> x+strlen(x) if (Func == LibFunc::stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) { Value *StrLen = EmitStrLen(Src, B, DL, TLI); - return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr; + return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr; } // If a) we don't have any length information, or b) we know this will @@ -2284,25 +2284,25 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, // st[rp]cpy_chk call which may fail at runtime if the size is too long. // TODO: It might be nice to get a maximum length out of the possible // string lengths for varying. - if (isFortifiedCallFoldable(CI, 2, 1, true)) { - Value *Ret = EmitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6)); - return Ret; - } else if (!OnlyLowerUnknownSize) { - // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk. - uint64_t Len = GetStringLength(Src); - if (Len == 0) - return nullptr; + if (isFortifiedCallFoldable(CI, 2, 1, true)) + return EmitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6)); - Type *SizeTTy = DL.getIntPtrType(CI->getContext()); - Value *LenV = ConstantInt::get(SizeTTy, Len); - Value *Ret = EmitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI); - // If the function was an __stpcpy_chk, and we were able to fold it into - // a __memcpy_chk, we still need to return the correct end pointer. - if (Ret && Func == LibFunc::stpcpy_chk) - return B.CreateGEP(Dst, ConstantInt::get(SizeTTy, Len - 1)); - return Ret; - } - return nullptr; + if (OnlyLowerUnknownSize) + return nullptr; + + // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk. + uint64_t Len = GetStringLength(Src); + if (Len == 0) + return nullptr; + + Type *SizeTTy = DL.getIntPtrType(CI->getContext()); + Value *LenV = ConstantInt::get(SizeTTy, Len); + Value *Ret = EmitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI); + // If the function was an __stpcpy_chk, and we were able to fold it into + // a __memcpy_chk, we still need to return the correct end pointer. + if (Ret && Func == LibFunc::stpcpy_chk) + return B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(SizeTTy, Len - 1)); + return Ret; } Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI, @@ -2322,8 +2322,18 @@ Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI, } Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) { - if (CI->isNoBuiltin()) - return nullptr; + // FIXME: We shouldn't be changing "nobuiltin" or TLI unavailable calls here. + // Some clang users checked for _chk libcall availability using: + // __has_builtin(__builtin___memcpy_chk) + // When compiling with -fno-builtin, this is always true. + // When passing -ffreestanding/-mkernel, which both imply -fno-builtin, we + // end up with fortified libcalls, which isn't acceptable in a freestanding + // environment which only provides their non-fortified counterparts. + // + // Until we change clang and/or teach external users to check for availability + // differently, disregard the "nobuiltin" attribute and TLI::has. + // + // PR23093. LibFunc::Func Func; Function *Callee = CI->getCalledFunction(); @@ -2332,7 +2342,7 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) { bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C; // First, check that this is a known library functions. - if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func)) + if (!TLI->getLibFunc(FuncName, Func)) return nullptr; // We never change the calling convention. diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index b7d0ae4..8986932 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -93,6 +93,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/VectorUtils.h" +#include "llvm/Transforms/Utils/LoopUtils.h" #include <algorithm> #include <map> #include <tuple> @@ -503,8 +504,7 @@ static std::string getDebugLocString(const Loop *L) { std::string Result; if (L) { raw_string_ostream OS(Result); - const DebugLoc LoopDbgLoc = L->getStartLoc(); - if (!LoopDbgLoc.isUnknown()) + if (const DebugLoc LoopDbgLoc = L->getStartLoc()) LoopDbgLoc.print(OS); else // Just print the module name. @@ -686,7 +686,7 @@ public: Index = B.CreateNeg(Index); else if (!StepValue->isOne()) Index = B.CreateMul(Index, StepValue); - return B.CreateGEP(StartValue, Index); + return B.CreateGEP(nullptr, StartValue, Index); case IK_NoInduction: return nullptr; @@ -1839,7 +1839,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { for (unsigned Part = 0; Part < UF; ++Part) { // Calculate the pointer for the specific unroll-part. - Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF)); + Value *PartPtr = + Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF)); if (Reverse) { // If we store to reverse consecutive memory locations then we need @@ -1847,8 +1848,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { StoredVal[Part] = reverseVector(StoredVal[Part]); // If the address is consecutive but reversed, then the // wide store needs to start at the last vector element. - PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)); - PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); + PartPtr = Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF)); + PartPtr = Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF)); Mask[Part] = reverseVector(Mask[Part]); } @@ -1871,13 +1872,14 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { setDebugLocFromInst(Builder, LI); for (unsigned Part = 0; Part < UF; ++Part) { // Calculate the pointer for the specific unroll-part. - Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF)); + Value *PartPtr = + Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF)); if (Reverse) { // If the address is consecutive but reversed, then the // wide load needs to start at the last vector element. - PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)); - PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); + PartPtr = Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF)); + PartPtr = Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF)); Mask[Part] = reverseVector(Mask[Part]); } @@ -4007,6 +4009,14 @@ bool LoopVectorizationLegality::canVectorizeMemory() { if (!LAI->canVectorizeMemory()) return false; + if (LAI->hasStoreToLoopInvariantAddress()) { + emitAnalysis( + VectorizationReport() + << "write to a loop invariant address could not be vectorized"); + DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n"); + return false; + } + if (LAI->getNumRuntimePointerChecks() > VectorizerParams::RuntimeMemoryCheckThreshold) { emitAnalysis(VectorizationReport() @@ -4307,32 +4317,31 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I, } } -LoopVectorizationLegality::InductionKind -LoopVectorizationLegality::isInductionVariable(PHINode *Phi, - ConstantInt *&StepValue) { +bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE, + ConstantInt *&StepValue) { Type *PhiTy = Phi->getType(); // We only handle integer and pointer inductions variables. if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) - return IK_NoInduction; + return false; // Check that the PHI is consecutive. const SCEV *PhiScev = SE->getSCEV(Phi); const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); if (!AR) { DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); - return IK_NoInduction; + return false; } const SCEV *Step = AR->getStepRecurrence(*SE); // Calculate the pointer stride and check if it is consecutive. const SCEVConstant *C = dyn_cast<SCEVConstant>(Step); if (!C) - return IK_NoInduction; + return false; ConstantInt *CV = C->getValue(); if (PhiTy->isIntegerTy()) { StepValue = CV; - return IK_IntInduction; + return true; } assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); @@ -4340,14 +4349,28 @@ LoopVectorizationLegality::isInductionVariable(PHINode *Phi, // The pointer stride cannot be determined if the pointer element type is not // sized. if (!PointerElementType->isSized()) - return IK_NoInduction; + return false; const DataLayout &DL = Phi->getModule()->getDataLayout(); int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType)); int64_t CVSize = CV->getSExtValue(); if (CVSize % Size) - return IK_NoInduction; + return false; StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size); + return true; +} + +LoopVectorizationLegality::InductionKind +LoopVectorizationLegality::isInductionVariable(PHINode *Phi, + ConstantInt *&StepValue) { + if (!isInductionPHI(Phi, SE, StepValue)) + return IK_NoInduction; + + Type *PhiTy = Phi->getType(); + // Found an Integer induction variable. + if (PhiTy->isIntegerTy()) + return IK_IntInduction; + // Found an Pointer induction variable. return IK_PtrInduction; } diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 8fc4cc1..7267f58 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1183,7 +1183,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { case Instruction::ICmp: case Instruction::FCmp: { // Check that all of the compares have the same predicate. - CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate(); + CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate(); Type *ComparedTy = cast<Instruction>(VL[0])->getOperand(0)->getType(); for (unsigned i = 1, e = VL.size(); i < e; ++i) { CmpInst *Cmp = cast<CmpInst>(VL[i]); @@ -2202,7 +2202,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { if (Value *V = alreadyVectorized(E->Scalars)) return V; - CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate(); + CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate(); Value *V; if (Opcode == Instruction::FCmp) V = Builder.CreateFCmp(P0, L, R); @@ -3101,9 +3101,7 @@ struct SLPVectorizer : public FunctionPass { // delete instructions. // Scan the blocks in the function in post order. - for (po_iterator<BasicBlock*> it = po_begin(&F.getEntryBlock()), - e = po_end(&F.getEntryBlock()); it != e; ++it) { - BasicBlock *BB = *it; + for (auto BB : post_order(&F.getEntryBlock())) { // Vectorize trees that end at stores. if (unsigned count = collectStores(BB, R)) { (void)count; |