summaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Scalar
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r--lib/Transforms/Scalar/AlignmentFromAssumptions.cpp4
-rw-r--r--lib/Transforms/Scalar/Android.mk2
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt2
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp8
-rw-r--r--lib/Transforms/Scalar/Float2Int.cpp540
-rw-r--r--lib/Transforms/Scalar/GVN.cpp6
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp64
-rw-r--r--lib/Transforms/Scalar/LoadCombine.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp4
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp5
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp2
-rw-r--r--lib/Transforms/Scalar/NaryReassociate.cpp252
-rw-r--r--lib/Transforms/Scalar/PlaceSafepoints.cpp5
-rw-r--r--lib/Transforms/Scalar/RewriteStatepointsForGC.cpp1237
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp3
-rw-r--r--lib/Transforms/Scalar/SROA.cpp30
-rw-r--r--lib/Transforms/Scalar/SampleProfile.cpp11
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp2
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp9
-rw-r--r--lib/Transforms/Scalar/Scalarizer.cpp2
-rw-r--r--lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp6
-rw-r--r--lib/Transforms/Scalar/StraightLineStrengthReduce.cpp338
-rw-r--r--lib/Transforms/Scalar/StructurizeCFG.cpp2
23 files changed, 1841 insertions, 695 deletions
diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 5aa2b97..8918909 100644
--- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -50,9 +50,9 @@ struct AlignmentFromAssumptions : public FunctionPass {
initializeAlignmentFromAssumptionsPass(*PassRegistry::getPassRegistry());
}
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<ScalarEvolution>();
AU.addRequired<DominatorTreeWrapperPass>();
diff --git a/lib/Transforms/Scalar/Android.mk b/lib/Transforms/Scalar/Android.mk
index cf30f39..16f2ead 100644
--- a/lib/Transforms/Scalar/Android.mk
+++ b/lib/Transforms/Scalar/Android.mk
@@ -11,6 +11,7 @@ transforms_scalar_SRC_FILES := \
DeadStoreElimination.cpp \
EarlyCSE.cpp \
FlattenCFGPass.cpp \
+ Float2Int.cpp \
GVN.cpp \
IndVarSimplify.cpp \
InductiveRangeCheckElimination.cpp \
@@ -30,6 +31,7 @@ transforms_scalar_SRC_FILES := \
LowerExpectIntrinsic.cpp \
MemCpyOptimizer.cpp \
MergedLoadStoreMotion.cpp \
+ NaryReassociate.cpp \
PartiallyInlineLibCalls.cpp \
PlaceSafepoints.cpp \
Reassociate.cpp \
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index d12fdb7..c29fcc3 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_library(LLVMScalarOpts
DeadStoreElimination.cpp
EarlyCSE.cpp
FlattenCFGPass.cpp
+ Float2Int.cpp
GVN.cpp
InductiveRangeCheckElimination.cpp
IndVarSimplify.cpp
@@ -28,6 +29,7 @@ add_llvm_library(LLVMScalarOpts
LowerExpectIntrinsic.cpp
MemCpyOptimizer.cpp
MergedLoadStoreMotion.cpp
+ NaryReassociate.cpp
PartiallyInlineLibCalls.cpp
PlaceSafepoints.cpp
Reassociate.cpp
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index cb8981b..01952cf 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -168,7 +168,7 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {
return true;
}
}
- if (CallSite CS = I) {
+ if (auto CS = CallSite(I)) {
if (Function *F = CS.getCalledFunction()) {
if (TLI && TLI->has(LibFunc::strcpy) &&
F->getName() == TLI->getName(LibFunc::strcpy)) {
@@ -262,7 +262,7 @@ static bool isRemovable(Instruction *I) {
}
}
- if (CallSite CS = I)
+ if (auto CS = CallSite(I))
return CS.getInstruction()->use_empty();
return false;
@@ -306,7 +306,7 @@ static Value *getStoredPointerOperand(Instruction *I) {
}
}
- CallSite CS = I;
+ CallSite CS(I);
// All the supported functions so far happen to have dest as their first
// argument.
return CS.getArgument(0);
@@ -780,7 +780,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
continue;
}
- if (CallSite CS = cast<Value>(BBI)) {
+ if (auto CS = CallSite(BBI)) {
// Remove allocation function calls from the list of dead stack objects;
// there can't be any references before the definition.
if (isAllocLikeFn(BBI, TLI))
diff --git a/lib/Transforms/Scalar/Float2Int.cpp b/lib/Transforms/Scalar/Float2Int.cpp
new file mode 100644
index 0000000..c931422
--- /dev/null
+++ b/lib/Transforms/Scalar/Float2Int.cpp
@@ -0,0 +1,540 @@
+//===- Float2Int.cpp - Demote floating point ops to work on integers ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Float2Int pass, which aims to demote floating
+// point operations to work on integers, where that is losslessly possible.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "float2int"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include <deque>
+#include <functional> // For std::function
+using namespace llvm;
+
+// The algorithm is simple. Start at instructions that convert from the
+// float to the int domain: fptoui, fptosi and fcmp. Walk up the def-use
+// graph, using an equivalence datastructure to unify graphs that interfere.
+//
+// Mappable instructions are those with an integer corrollary that, given
+// integer domain inputs, produce an integer output; fadd, for example.
+//
+// If a non-mappable instruction is seen, this entire def-use graph is marked
+// as non-transformable. If we see an instruction that converts from the
+// integer domain to FP domain (uitofp,sitofp), we terminate our walk.
+
+/// The largest integer type worth dealing with.
+static cl::opt<unsigned>
+MaxIntegerBW("float2int-max-integer-bw", cl::init(64), cl::Hidden,
+ cl::desc("Max integer bitwidth to consider in float2int"
+ "(default=64)"));
+
+namespace {
+ struct Float2Int : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ Float2Int() : FunctionPass(ID) {
+ initializeFloat2IntPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ }
+
+ void findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots);
+ ConstantRange seen(Instruction *I, ConstantRange R);
+ ConstantRange badRange();
+ ConstantRange unknownRange();
+ ConstantRange validateRange(ConstantRange R);
+ void walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots);
+ void walkForwards();
+ bool validateAndTransform();
+ Value *convert(Instruction *I, Type *ToTy);
+ void cleanup();
+
+ MapVector<Instruction*, ConstantRange > SeenInsts;
+ SmallPtrSet<Instruction*,8> Roots;
+ EquivalenceClasses<Instruction*> ECs;
+ MapVector<Instruction*, Value*> ConvertedInsts;
+ LLVMContext *Ctx;
+ };
+}
+
+char Float2Int::ID = 0;
+INITIALIZE_PASS(Float2Int, "float2int", "Float to int", false, false)
+
+// Given a FCmp predicate, return a matching ICmp predicate if one
+// exists, otherwise return BAD_ICMP_PREDICATE.
+static CmpInst::Predicate mapFCmpPred(CmpInst::Predicate P) {
+ switch (P) {
+ case CmpInst::FCMP_OEQ:
+ case CmpInst::FCMP_UEQ:
+ return CmpInst::ICMP_EQ;
+ case CmpInst::FCMP_OGT:
+ case CmpInst::FCMP_UGT:
+ return CmpInst::ICMP_SGT;
+ case CmpInst::FCMP_OGE:
+ case CmpInst::FCMP_UGE:
+ return CmpInst::ICMP_SGE;
+ case CmpInst::FCMP_OLT:
+ case CmpInst::FCMP_ULT:
+ return CmpInst::ICMP_SLT;
+ case CmpInst::FCMP_OLE:
+ case CmpInst::FCMP_ULE:
+ return CmpInst::ICMP_SLE;
+ case CmpInst::FCMP_ONE:
+ case CmpInst::FCMP_UNE:
+ return CmpInst::ICMP_NE;
+ default:
+ return CmpInst::BAD_ICMP_PREDICATE;
+ }
+}
+
+// Given a floating point binary operator, return the matching
+// integer version.
+static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default: llvm_unreachable("Unhandled opcode!");
+ case Instruction::FAdd: return Instruction::Add;
+ case Instruction::FSub: return Instruction::Sub;
+ case Instruction::FMul: return Instruction::Mul;
+ }
+}
+
+// Find the roots - instructions that convert from the FP domain to
+// integer domain.
+void Float2Int::findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots) {
+ for (auto &I : inst_range(F)) {
+ switch (I.getOpcode()) {
+ default: break;
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ Roots.insert(&I);
+ break;
+ case Instruction::FCmp:
+ if (mapFCmpPred(cast<CmpInst>(&I)->getPredicate()) !=
+ CmpInst::BAD_ICMP_PREDICATE)
+ Roots.insert(&I);
+ break;
+ }
+ }
+}
+
+// Helper - mark I as having been traversed, having range R.
+ConstantRange Float2Int::seen(Instruction *I, ConstantRange R) {
+ DEBUG(dbgs() << "F2I: " << *I << ":" << R << "\n");
+ if (SeenInsts.find(I) != SeenInsts.end())
+ SeenInsts.find(I)->second = R;
+ else
+ SeenInsts.insert(std::make_pair(I, R));
+ return R;
+}
+
+// Helper - get a range representing a poison value.
+ConstantRange Float2Int::badRange() {
+ return ConstantRange(MaxIntegerBW + 1, true);
+}
+ConstantRange Float2Int::unknownRange() {
+ return ConstantRange(MaxIntegerBW + 1, false);
+}
+ConstantRange Float2Int::validateRange(ConstantRange R) {
+ if (R.getBitWidth() > MaxIntegerBW + 1)
+ return badRange();
+ return R;
+}
+
+// The most obvious way to structure the search is a depth-first, eager
+// search from each root. However, that require direct recursion and so
+// can only handle small instruction sequences. Instead, we split the search
+// up into two phases:
+// - walkBackwards: A breadth-first walk of the use-def graph starting from
+// the roots. Populate "SeenInsts" with interesting
+// instructions and poison values if they're obvious and
+// cheap to compute. Calculate the equivalance set structure
+// while we're here too.
+// - walkForwards: Iterate over SeenInsts in reverse order, so we visit
+// defs before their uses. Calculate the real range info.
+
+// Breadth-first walk of the use-def graph; determine the set of nodes
+// we care about and eagerly determine if some of them are poisonous.
+void Float2Int::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) {
+ std::deque<Instruction*> Worklist(Roots.begin(), Roots.end());
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.back();
+ Worklist.pop_back();
+
+ if (SeenInsts.find(I) != SeenInsts.end())
+ // Seen already.
+ continue;
+
+ switch (I->getOpcode()) {
+ // FIXME: Handle select and phi nodes.
+ default:
+ // Path terminated uncleanly.
+ seen(I, badRange());
+ break;
+
+ case Instruction::UIToFP: {
+ // Path terminated cleanly.
+ unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+ APInt Min = APInt::getMinValue(BW).zextOrSelf(MaxIntegerBW+1);
+ APInt Max = APInt::getMaxValue(BW).zextOrSelf(MaxIntegerBW+1);
+ seen(I, validateRange(ConstantRange(Min, Max)));
+ continue;
+ }
+
+ case Instruction::SIToFP: {
+ // Path terminated cleanly.
+ unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+ APInt SMin = APInt::getSignedMinValue(BW).sextOrSelf(MaxIntegerBW+1);
+ APInt SMax = APInt::getSignedMaxValue(BW).sextOrSelf(MaxIntegerBW+1);
+ seen(I, validateRange(ConstantRange(SMin, SMax)));
+ continue;
+ }
+
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FCmp:
+ seen(I, unknownRange());
+ break;
+ }
+
+ for (Value *O : I->operands()) {
+ if (Instruction *OI = dyn_cast<Instruction>(O)) {
+ // Unify def-use chains if they interfere.
+ ECs.unionSets(I, OI);
+ if (SeenInsts.find(I)->second != badRange())
+ Worklist.push_back(OI);
+ } else if (!isa<ConstantFP>(O)) {
+ // Not an instruction or ConstantFP? we can't do anything.
+ seen(I, badRange());
+ }
+ }
+ }
+}
+
+// Walk forwards down the list of seen instructions, so we visit defs before
+// uses.
+void Float2Int::walkForwards() {
+ for (auto It = SeenInsts.rbegin(), E = SeenInsts.rend(); It != E; ++It) {
+ if (It->second != unknownRange())
+ continue;
+
+ Instruction *I = It->first;
+ std::function<ConstantRange(ArrayRef<ConstantRange>)> Op;
+ switch (I->getOpcode()) {
+ // FIXME: Handle select and phi nodes.
+ default:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ llvm_unreachable("Should have been handled in walkForwards!");
+
+ case Instruction::FAdd:
+ Op = [](ArrayRef<ConstantRange> Ops) {
+ assert(Ops.size() == 2 && "FAdd is a binary operator!");
+ return Ops[0].add(Ops[1]);
+ };
+ break;
+
+ case Instruction::FSub:
+ Op = [](ArrayRef<ConstantRange> Ops) {
+ assert(Ops.size() == 2 && "FSub is a binary operator!");
+ return Ops[0].sub(Ops[1]);
+ };
+ break;
+
+ case Instruction::FMul:
+ Op = [](ArrayRef<ConstantRange> Ops) {
+ assert(Ops.size() == 2 && "FMul is a binary operator!");
+ return Ops[0].multiply(Ops[1]);
+ };
+ break;
+
+ //
+ // Root-only instructions - we'll only see these if they're the
+ // first node in a walk.
+ //
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ Op = [](ArrayRef<ConstantRange> Ops) {
+ assert(Ops.size() == 1 && "FPTo[US]I is a unary operator!");
+ return Ops[0];
+ };
+ break;
+
+ case Instruction::FCmp:
+ Op = [](ArrayRef<ConstantRange> Ops) {
+ assert(Ops.size() == 2 && "FCmp is a binary operator!");
+ return Ops[0].unionWith(Ops[1]);
+ };
+ break;
+ }
+
+ bool Abort = false;
+ SmallVector<ConstantRange,4> OpRanges;
+ for (Value *O : I->operands()) {
+ if (Instruction *OI = dyn_cast<Instruction>(O)) {
+ assert(SeenInsts.find(OI) != SeenInsts.end() &&
+ "def not seen before use!");
+ OpRanges.push_back(SeenInsts.find(OI)->second);
+ } else if (ConstantFP *CF = dyn_cast<ConstantFP>(O)) {
+ // Work out if the floating point number can be losslessly represented
+ // as an integer.
+ // APFloat::convertToInteger(&Exact) purports to do what we want, but
+ // the exactness can be too precise. For example, negative zero can
+ // never be exactly converted to an integer.
+ //
+ // Instead, we ask APFloat to round itself to an integral value - this
+ // preserves sign-of-zero - then compare the result with the original.
+ //
+ APFloat F = CF->getValueAPF();
+
+ // First, weed out obviously incorrect values. Non-finite numbers
+ // can't be represented and neither can negative zero, unless
+ // we're in fast math mode.
+ if (!F.isFinite() ||
+ (F.isZero() && F.isNegative() && isa<FPMathOperator>(I) &&
+ !I->hasNoSignedZeros())) {
+ seen(I, badRange());
+ Abort = true;
+ break;
+ }
+
+ APFloat NewF = F;
+ auto Res = NewF.roundToIntegral(APFloat::rmNearestTiesToEven);
+ if (Res != APFloat::opOK || NewF.compare(F) != APFloat::cmpEqual) {
+ seen(I, badRange());
+ Abort = true;
+ break;
+ }
+ // OK, it's representable. Now get it.
+ APSInt Int(MaxIntegerBW+1, false);
+ bool Exact;
+ CF->getValueAPF().convertToInteger(Int,
+ APFloat::rmNearestTiesToEven,
+ &Exact);
+ OpRanges.push_back(ConstantRange(Int));
+ } else {
+ llvm_unreachable("Should have already marked this as badRange!");
+ }
+ }
+
+ // Reduce the operands' ranges to a single range and return.
+ if (!Abort)
+ seen(I, Op(OpRanges));
+ }
+}
+
+// If there is a valid transform to be done, do it.
+bool Float2Int::validateAndTransform() {
+ bool MadeChange = false;
+
+ // Iterate over every disjoint partition of the def-use graph.
+ for (auto It = ECs.begin(), E = ECs.end(); It != E; ++It) {
+ ConstantRange R(MaxIntegerBW + 1, false);
+ bool Fail = false;
+ Type *ConvertedToTy = nullptr;
+
+ // For every member of the partition, union all the ranges together.
+ for (auto MI = ECs.member_begin(It), ME = ECs.member_end();
+ MI != ME; ++MI) {
+ Instruction *I = *MI;
+ auto SeenI = SeenInsts.find(I);
+ if (SeenI == SeenInsts.end())
+ continue;
+
+ R = R.unionWith(SeenI->second);
+ // We need to ensure I has no users that have not been seen.
+ // If it does, transformation would be illegal.
+ //
+ // Don't count the roots, as they terminate the graphs.
+ if (Roots.count(I) == 0) {
+ // Set the type of the conversion while we're here.
+ if (!ConvertedToTy)
+ ConvertedToTy = I->getType();
+ for (User *U : I->users()) {
+ Instruction *UI = dyn_cast<Instruction>(U);
+ if (!UI || SeenInsts.find(UI) == SeenInsts.end()) {
+ DEBUG(dbgs() << "F2I: Failing because of " << *U << "\n");
+ Fail = true;
+ break;
+ }
+ }
+ }
+ if (Fail)
+ break;
+ }
+
+ // If the set was empty, or we failed, or the range is poisonous,
+ // bail out.
+ if (ECs.member_begin(It) == ECs.member_end() || Fail ||
+ R.isFullSet() || R.isSignWrappedSet())
+ continue;
+ assert(ConvertedToTy && "Must have set the convertedtoty by this point!");
+
+ // The number of bits required is the maximum of the upper and
+ // lower limits, plus one so it can be signed.
+ unsigned MinBW = std::max(R.getLower().getMinSignedBits(),
+ R.getUpper().getMinSignedBits()) + 1;
+ DEBUG(dbgs() << "F2I: MinBitwidth=" << MinBW << ", R: " << R << "\n");
+
+ // If we've run off the realms of the exactly representable integers,
+ // the floating point result will differ from an integer approximation.
+
+ // Do we need more bits than are in the mantissa of the type we converted
+ // to? semanticsPrecision returns the number of mantissa bits plus one
+ // for the sign bit.
+ unsigned MaxRepresentableBits
+ = APFloat::semanticsPrecision(ConvertedToTy->getFltSemantics()) - 1;
+ if (MinBW > MaxRepresentableBits) {
+ DEBUG(dbgs() << "F2I: Value not guaranteed to be representable!\n");
+ continue;
+ }
+ if (MinBW > 64) {
+ DEBUG(dbgs() << "F2I: Value requires more than 64 bits to represent!\n");
+ continue;
+ }
+
+ // OK, R is known to be representable. Now pick a type for it.
+ // FIXME: Pick the smallest legal type that will fit.
+ Type *Ty = (MinBW > 32) ? Type::getInt64Ty(*Ctx) : Type::getInt32Ty(*Ctx);
+
+ for (auto MI = ECs.member_begin(It), ME = ECs.member_end();
+ MI != ME; ++MI)
+ convert(*MI, Ty);
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+Value *Float2Int::convert(Instruction *I, Type *ToTy) {
+ if (ConvertedInsts.find(I) != ConvertedInsts.end())
+ // Already converted this instruction.
+ return ConvertedInsts[I];
+
+ SmallVector<Value*,4> NewOperands;
+ for (Value *V : I->operands()) {
+ // Don't recurse if we're an instruction that terminates the path.
+ if (I->getOpcode() == Instruction::UIToFP ||
+ I->getOpcode() == Instruction::SIToFP) {
+ NewOperands.push_back(V);
+ } else if (Instruction *VI = dyn_cast<Instruction>(V)) {
+ NewOperands.push_back(convert(VI, ToTy));
+ } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+ APSInt Val(ToTy->getPrimitiveSizeInBits(), /*IsUnsigned=*/false);
+ bool Exact;
+ CF->getValueAPF().convertToInteger(Val,
+ APFloat::rmNearestTiesToEven,
+ &Exact);
+ NewOperands.push_back(ConstantInt::get(ToTy, Val));
+ } else {
+ llvm_unreachable("Unhandled operand type?");
+ }
+ }
+
+ // Now create a new instruction.
+ IRBuilder<> IRB(I);
+ Value *NewV = nullptr;
+ switch (I->getOpcode()) {
+ default: llvm_unreachable("Unhandled instruction!");
+
+ case Instruction::FPToUI:
+ NewV = IRB.CreateZExtOrTrunc(NewOperands[0], I->getType());
+ break;
+
+ case Instruction::FPToSI:
+ NewV = IRB.CreateSExtOrTrunc(NewOperands[0], I->getType());
+ break;
+
+ case Instruction::FCmp: {
+ CmpInst::Predicate P = mapFCmpPred(cast<CmpInst>(I)->getPredicate());
+ assert(P != CmpInst::BAD_ICMP_PREDICATE && "Unhandled predicate!");
+ NewV = IRB.CreateICmp(P, NewOperands[0], NewOperands[1], I->getName());
+ break;
+ }
+
+ case Instruction::UIToFP:
+ NewV = IRB.CreateZExtOrTrunc(NewOperands[0], ToTy);
+ break;
+
+ case Instruction::SIToFP:
+ NewV = IRB.CreateSExtOrTrunc(NewOperands[0], ToTy);
+ break;
+
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ NewV = IRB.CreateBinOp(mapBinOpcode(I->getOpcode()),
+ NewOperands[0], NewOperands[1],
+ I->getName());
+ break;
+ }
+
+ // If we're a root instruction, RAUW.
+ if (Roots.count(I))
+ I->replaceAllUsesWith(NewV);
+
+ ConvertedInsts[I] = NewV;
+ return NewV;
+}
+
+// Perform dead code elimination on the instructions we just modified.
+void Float2Int::cleanup() {
+ for (auto I = ConvertedInsts.rbegin(), E = ConvertedInsts.rend();
+ I != E; ++I)
+ I->first->eraseFromParent();
+}
+
+bool Float2Int::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
+ DEBUG(dbgs() << "F2I: Looking at function " << F.getName() << "\n");
+ // Clear out all state.
+ ECs = EquivalenceClasses<Instruction*>();
+ SeenInsts.clear();
+ ConvertedInsts.clear();
+ Roots.clear();
+
+ Ctx = &F.getParent()->getContext();
+
+ findRoots(F, Roots);
+
+ walkBackwards(Roots);
+ walkForwards();
+
+ bool Modified = validateAndTransform();
+ if (Modified)
+ cleanup();
+ return Modified;
+}
+
+FunctionPass *llvm::createFloat2IntPass() {
+ return new Float2Int();
+}
+
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index c73e60f..97d5b6d 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1102,7 +1102,8 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
Type::getInt8PtrTy(Src->getContext(), AS));
Constant *OffsetCst =
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
- Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
+ Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
+ OffsetCst);
Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
if (ConstantFoldLoadFromConstPtr(Src, DL))
return Offset;
@@ -1263,7 +1264,8 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
Type::getInt8PtrTy(Src->getContext(), AS));
Constant *OffsetCst =
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
- Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
+ Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
+ OffsetCst);
Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
return ConstantFoldLoadFromConstPtr(Src, DL);
}
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 51e8041..ab8e5b8 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1274,55 +1274,6 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
//===----------------------------------------------------------------------===//
-/// Check for expressions that ScalarEvolution generates to compute
-/// BackedgeTakenInfo. If these expressions have not been reduced, then
-/// expanding them may incur additional cost (albeit in the loop preheader).
-static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
- SmallPtrSetImpl<const SCEV*> &Processed,
- ScalarEvolution *SE) {
- if (!Processed.insert(S).second)
- return false;
-
- // If the backedge-taken count is a UDiv, it's very likely a UDiv that
- // ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a
- // precise expression, rather than a UDiv from the user's code. If we can't
- // find a UDiv in the code with some simple searching, assume the former and
- // forego rewriting the loop.
- if (isa<SCEVUDivExpr>(S)) {
- ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
- if (!OrigCond) return true;
- const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
- R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
- if (R != S) {
- const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
- L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
- if (L != S)
- return true;
- }
- }
-
- // Recurse past add expressions, which commonly occur in the
- // BackedgeTakenCount. They may already exist in program code, and if not,
- // they are not too expensive rematerialize.
- if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
- for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
- I != E; ++I) {
- if (isHighCostExpansion(*I, BI, Processed, SE))
- return true;
- }
- return false;
- }
-
- // HowManyLessThans uses a Max expression whenever the loop is not guarded by
- // the exit condition.
- if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
- return true;
-
- // If we haven't recognized an expensive SCEV pattern, assume it's an
- // expression produced by program code.
- return false;
-}
-
/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
/// count expression can be safely and cheaply expanded into an instruction
/// sequence that can be used by LinearFunctionTestReplace.
@@ -1336,7 +1287,8 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
/// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
/// However, we don't yet have a strong motivation for converting loop tests
/// into inequality tests.
-static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
+static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE,
+ SCEVExpander &Rewriter) {
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
BackedgeTakenCount->isZero())
@@ -1346,12 +1298,10 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
return false;
// Can't rewrite non-branch yet.
- BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
- if (!BI)
+ if (!isa<BranchInst>(L->getExitingBlock()->getTerminator()))
return false;
- SmallPtrSet<const SCEV*, 8> Processed;
- if (isHighCostExpansion(BackedgeTakenCount, BI, Processed, SE))
+ if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L))
return false;
return true;
@@ -1637,7 +1587,7 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
&& "unit stride pointer IV must be i8*");
IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
- return Builder.CreateGEP(GEPBase, GEPOffset, "lftr.limit");
+ return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit");
}
else {
// In any other case, convert both IVInit and IVCount to integers before
@@ -1691,7 +1641,7 @@ LinearFunctionTestReplace(Loop *L,
const SCEV *BackedgeTakenCount,
PHINode *IndVar,
SCEVExpander &Rewriter) {
- assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
+ assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition");
// Initialize CmpIndVar and IVCount to their preincremented values.
Value *CmpIndVar = IndVar;
@@ -1936,7 +1886,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// If we have a trip count expression, rewrite the loop's exit condition
// using it. We can currently only handle loops with a single exit.
- if (canExpandBackedgeTakenCount(L, SE) && needsLFTR(L, DT)) {
+ if (canExpandBackedgeTakenCount(L, SE, Rewriter) && needsLFTR(L, DT)) {
PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT);
if (IndVar) {
// Check preconditions for proper SCEVExpander operation. SCEV does not
diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp
index 1f33f72..c19cd19 100644
--- a/lib/Transforms/Scalar/LoadCombine.cpp
+++ b/lib/Transforms/Scalar/LoadCombine.cpp
@@ -41,9 +41,9 @@ struct PointerOffsetPair {
};
struct LoadPOPPair {
+ LoadPOPPair() = default;
LoadPOPPair(LoadInst *L, PointerOffsetPair P, unsigned O)
: Load(L), POP(P), InsertOrder(O) {}
- LoadPOPPair() {}
LoadInst *Load;
PointerOffsetPair POP;
/// \brief The new load needs to be created before the first load in IR order.
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 8445d5f..099f227 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -28,7 +28,7 @@
//
// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
// it's useful to think about these as the same register, with some uses using
-// the value of the register before the add and some using // it after. In this
+// the value of the register before the add and some using it after. In this
// example, the icmp is a post-increment user, since it uses %i.next, which is
// the value of the induction variable after the increment. The other common
// case of post-increment users is users outside the loop.
@@ -112,8 +112,6 @@ public:
/// a particular register.
SmallBitVector UsedByIndices;
- RegSortData() {}
-
void print(raw_ostream &OS) const;
void dump() const;
};
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 600cbde..3de3b3d 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -165,6 +165,7 @@ namespace {
UP.MaxCount = UINT_MAX;
UP.Partial = CurrentAllowPartial;
UP.Runtime = CurrentRuntime;
+ UP.AllowExpensiveTripCount = false;
TTI.getUnrollingPreferences(L, UP);
}
@@ -886,8 +887,8 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
// Unroll the loop.
- if (!UnrollLoop(L, Count, TripCount, AllowRuntime, TripMultiple, LI, this,
- &LPM, &AC))
+ if (!UnrollLoop(L, Count, TripCount, AllowRuntime, UP.AllowExpensiveTripCount,
+ TripMultiple, LI, this, &LPM, &AC))
return false;
return true;
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 2b5a078..d651473 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1045,7 +1045,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
RepeatInstruction = processMemCpy(M);
else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
RepeatInstruction = processMemMove(M);
- else if (CallSite CS = (Value*)I) {
+ else if (auto CS = CallSite(I)) {
for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
if (CS.isByValArgument(i))
MadeChange |= processByValArgument(CS, i);
diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp
new file mode 100644
index 0000000..fea7641
--- /dev/null
+++ b/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -0,0 +1,252 @@
+//===- NaryReassociate.cpp - Reassociate n-ary expressions ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass reassociates n-ary add expressions and eliminates the redundancy
+// exposed by the reassociation.
+//
+// A motivating example:
+//
+// void foo(int a, int b) {
+// bar(a + b);
+// bar((a + 2) + b);
+// }
+//
+// An ideal compiler should reassociate (a + 2) + b to (a + b) + 2 and simplify
+// the above code to
+//
+// int t = a + b;
+// bar(t);
+// bar(t + 2);
+//
+// However, the Reassociate pass is unable to do that because it processes each
+// instruction individually and believes (a + 2) + b is the best form according
+// to its rank system.
+//
+// To address this limitation, NaryReassociate reassociates an expression in a
+// form that reuses existing instructions. As a result, NaryReassociate can
+// reassociate (a + 2) + b in the example to (a + b) + 2 because it detects that
+// (a + b) is computed before.
+//
+// NaryReassociate works as follows. For every instruction in the form of (a +
+// b) + c, it checks whether a + c or b + c is already computed by a dominating
+// instruction. If so, it then reassociates (a + b) + c into (a + c) + b or (b +
+// c) + a and removes the redundancy accordingly. To efficiently look up whether
+// an expression is computed before, we store each instruction seen and its SCEV
+// into an SCEV-to-instruction map.
+//
+// Although the algorithm pattern-matches only ternary additions, it
+// automatically handles many >3-ary expressions by walking through the function
+// in the depth-first order. For example, given
+//
+// (a + c) + d
+// ((a + b) + c) + d
+//
+// NaryReassociate first rewrites (a + b) + c to (a + c) + b, and then rewrites
+// ((a + c) + b) + d into ((a + c) + d) + b.
+//
+// Finally, the above dominator-based algorithm may need to be run multiple
+// iterations before emitting optimal code. One source of this need is that we
+// only split an operand when it is used only once. The above algorithm can
+// eliminate an instruction and decrease the usage count of its operands. As a
+// result, an instruction that previously had multiple uses may become a
+// single-use instruction and thus eligible for split consideration. For
+// example,
+//
+// ac = a + c
+// ab = a + b
+// abc = ab + c
+// ab2 = ab + b
+// ab2c = ab2 + c
+//
+// In the first iteration, we cannot reassociate abc to ac+b because ab is used
+// twice. However, we can reassociate ab2c to abc+b in the first iteration. As a
+// result, ab2 becomes dead and ab will be used only once in the second
+// iteration.
+//
+// Limitations and TODO items:
+//
+// 1) We only considers n-ary adds for now. This should be extended and
+// generalized.
+//
+// 2) Besides arithmetic operations, similar reassociation can be applied to
+// GEPs. For example, if
+// X = &arr[a]
+// dominates
+// Y = &arr[a + b]
+// we may rewrite Y into X + b.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+#define DEBUG_TYPE "nary-reassociate"
+
+namespace {
+class NaryReassociate : public FunctionPass {
+public:
+ static char ID;
+
+ NaryReassociate(): FunctionPass(ID) {
+ initializeNaryReassociatePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.setPreservesCFG();
+ }
+
+private:
+ // Runs only one iteration of the dominator-based algorithm. See the header
+ // comments for why we need multiple iterations.
+ bool doOneIteration(Function &F);
+ // Reasssociates I to a better form.
+ Instruction *tryReassociateAdd(Instruction *I);
+ // A helper function for tryReassociateAdd. LHS and RHS are explicitly passed.
+ Instruction *tryReassociateAdd(Value *LHS, Value *RHS, Instruction *I);
+ // Rewrites I to LHS + RHS if LHS is computed already.
+ Instruction *tryReassociatedAdd(const SCEV *LHS, Value *RHS, Instruction *I);
+
+ DominatorTree *DT;
+ ScalarEvolution *SE;
+ TargetLibraryInfo *TLI;
+ // A lookup table quickly telling which instructions compute the given SCEV.
+ // Note that there can be multiple instructions at different locations
+ // computing to the same SCEV, so we map a SCEV to an instruction list. For
+ // example,
+ //
+ // if (p1)
+ // foo(a + b);
+ // if (p2)
+ // bar(a + b);
+ DenseMap<const SCEV *, SmallVector<Instruction *, 2>> SeenExprs;
+};
+} // anonymous namespace
+
+char NaryReassociate::ID = 0;
+INITIALIZE_PASS_BEGIN(NaryReassociate, "nary-reassociate", "Nary reassociation",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(NaryReassociate, "nary-reassociate", "Nary reassociation",
+ false, false)
+
+FunctionPass *llvm::createNaryReassociatePass() {
+ return new NaryReassociate();
+}
+
+bool NaryReassociate::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ SE = &getAnalysis<ScalarEvolution>();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+
+ bool Changed = false, ChangedInThisIteration;
+ do {
+ ChangedInThisIteration = doOneIteration(F);
+ Changed |= ChangedInThisIteration;
+ } while (ChangedInThisIteration);
+ return Changed;
+}
+
+bool NaryReassociate::doOneIteration(Function &F) {
+ bool Changed = false;
+ SeenExprs.clear();
+ // Traverse the dominator tree in the depth-first order. This order makes sure
+ // all bases of a candidate are in Candidates when we process it.
+ for (auto Node = GraphTraits<DominatorTree *>::nodes_begin(DT);
+ Node != GraphTraits<DominatorTree *>::nodes_end(DT); ++Node) {
+ BasicBlock *BB = Node->getBlock();
+ for (auto I = BB->begin(); I != BB->end(); ++I) {
+ if (I->getOpcode() == Instruction::Add) {
+ if (Instruction *NewI = tryReassociateAdd(I)) {
+ Changed = true;
+ SE->forgetValue(I);
+ I->replaceAllUsesWith(NewI);
+ RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+ I = NewI;
+ }
+ // We should add the rewritten instruction because tryReassociateAdd may
+ // have invalidated the original one.
+ SeenExprs[SE->getSCEV(I)].push_back(I);
+ }
+ }
+ }
+ return Changed;
+}
+
+Instruction *NaryReassociate::tryReassociateAdd(Instruction *I) {
+ Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
+ if (auto *NewI = tryReassociateAdd(LHS, RHS, I))
+ return NewI;
+ if (auto *NewI = tryReassociateAdd(RHS, LHS, I))
+ return NewI;
+ return nullptr;
+}
+
+Instruction *NaryReassociate::tryReassociateAdd(Value *LHS, Value *RHS,
+ Instruction *I) {
+ Value *A = nullptr, *B = nullptr;
+ // To be conservative, we reassociate I only when it is the only user of A+B.
+ if (LHS->hasOneUse() && match(LHS, m_Add(m_Value(A), m_Value(B)))) {
+ // I = (A + B) + RHS
+ // = (A + RHS) + B or (B + RHS) + A
+ const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B);
+ const SCEV *RHSExpr = SE->getSCEV(RHS);
+ if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(AExpr, RHSExpr), B, I))
+ return NewI;
+ if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(BExpr, RHSExpr), A, I))
+ return NewI;
+ }
+ return nullptr;
+}
+
+Instruction *NaryReassociate::tryReassociatedAdd(const SCEV *LHSExpr,
+ Value *RHS, Instruction *I) {
+ auto Pos = SeenExprs.find(LHSExpr);
+ // Bail out if LHSExpr is not previously seen.
+ if (Pos == SeenExprs.end())
+ return nullptr;
+
+ auto &LHSCandidates = Pos->second;
+ // Look for the closest dominator LHS of I that computes LHSExpr, and replace
+ // I with LHS + RHS.
+ //
+ // Because we traverse the dominator tree in the pre-order, a
+ // candidate that doesn't dominate the current instruction won't dominate any
+ // future instruction either. Therefore, we pop it out of the stack. This
+ // optimization makes the algorithm O(n).
+ while (!LHSCandidates.empty()) {
+ Instruction *LHS = LHSCandidates.back();
+ if (DT->dominates(LHS, I)) {
+ Instruction *NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I);
+ NewI->takeName(I);
+ return NewI;
+ }
+ LHSCandidates.pop_back();
+ }
+ return nullptr;
+}
diff --git a/lib/Transforms/Scalar/PlaceSafepoints.cpp b/lib/Transforms/Scalar/PlaceSafepoints.cpp
index 944725a..536f2a6 100644
--- a/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -217,7 +217,7 @@ static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header,
BasicBlock *Current = Pred;
while (true) {
for (Instruction &I : *Current) {
- if (CallSite CS = &I)
+ if (auto CS = CallSite(&I))
// Note: Technically, needing a safepoint isn't quite the right
// condition here. We should instead be checking if the target method
// has an
@@ -424,8 +424,7 @@ static Instruction *findLocationForEntrySafepoint(Function &F,
// We need to stop going forward as soon as we see a call that can
// grow the stack (i.e. the call target has a non-zero frame
// size).
- if (CallSite CS = cursor) {
- (void)CS; // Silence an unused variable warning by gcc 4.8.2
+ if (CallSite(cursor)) {
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(cursor)) {
// llvm.assume(...) are not really calls.
if (II->getIntrinsicID() == Intrinsic::assume) {
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index f5d21ff..ba48e0a 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Dominators.h"
@@ -49,11 +50,20 @@ static cl::opt<bool> TraceLSP("trace-rewrite-statepoints", cl::Hidden,
// Print the liveset found at the insert location
static cl::opt<bool> PrintLiveSet("spp-print-liveset", cl::Hidden,
cl::init(false));
-static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size",
- cl::Hidden, cl::init(false));
+static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden,
+ cl::init(false));
// Print out the base pointers for debugging
-static cl::opt<bool> PrintBasePointers("spp-print-base-pointers",
- cl::Hidden, cl::init(false));
+static cl::opt<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden,
+ cl::init(false));
+
+#ifdef XDEBUG
+static bool ClobberNonLive = true;
+#else
+static bool ClobberNonLive = false;
+#endif
+static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live",
+ cl::location(ClobberNonLive),
+ cl::Hidden);
namespace {
struct RewriteStatepointsForGC : public FunctionPass {
@@ -85,6 +95,22 @@ INITIALIZE_PASS_END(RewriteStatepointsForGC, "rewrite-statepoints-for-gc",
"Make relocations explicit at statepoints", false, false)
namespace {
+struct GCPtrLivenessData {
+ /// Values defined in this block.
+ DenseMap<BasicBlock *, DenseSet<Value *>> KillSet;
+ /// Values used in this block (and thus live); does not included values
+ /// killed within this block.
+ DenseMap<BasicBlock *, DenseSet<Value *>> LiveSet;
+
+ /// Values live into this basic block (i.e. used by any
+ /// instruction in this basic block or ones reachable from here)
+ DenseMap<BasicBlock *, DenseSet<Value *>> LiveIn;
+
+ /// Values live out of this basic block (i.e. live into
+ /// any successor block)
+ DenseMap<BasicBlock *, DenseSet<Value *>> LiveOut;
+};
+
// The type of the internal cache used inside the findBasePointers family
// of functions. From the callers perspective, this is an opaque type and
// should not be inspected.
@@ -105,10 +131,6 @@ struct PartiallyConstructedSafepointRecord {
/// Mapping from live pointers to a base-defining-value
DenseMap<llvm::Value *, llvm::Value *> PointerToBase;
- /// Any new values which were added to the IR during base pointer analysis
- /// for this safepoint
- DenseSet<llvm::Value *> NewInsertedDefs;
-
/// The *new* gc.statepoint instruction itself. This produces the token
/// that normal path gc.relocates and the gc.result are tied to.
Instruction *StatepointToken;
@@ -119,6 +141,15 @@ struct PartiallyConstructedSafepointRecord {
};
}
+/// Compute the live-in set for every basic block in the function
+static void computeLiveInValues(DominatorTree &DT, Function &F,
+ GCPtrLivenessData &Data);
+
+/// Given results from the dataflow liveness computation, find the set of live
+/// Values at a particular instruction.
+static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data,
+ StatepointLiveSetTy &out);
+
// TODO: Once we can get to the GCStrategy, this becomes
// Optional<bool> isGCManagedPointer(const Value *V) const override {
@@ -131,129 +162,46 @@ static bool isGCPointerType(const Type *T) {
return false;
}
-/// Return true if the Value is a gc reference type which is potentially used
-/// after the instruction 'loc'. This is only used with the edge reachability
-/// liveness code. Note: It is assumed the V dominates loc.
-static bool isLiveGCReferenceAt(Value &V, Instruction *loc, DominatorTree &DT,
- LoopInfo *LI) {
- if (!isGCPointerType(V.getType()))
- return false;
-
- if (V.use_empty())
- return false;
-
- // Given assumption that V dominates loc, this may be live
- return true;
+// Return true if this type is one which a) is a gc pointer or contains a GC
+// pointer and b) is of a type this code expects to encounter as a live value.
+// (The insertion code will assert that a type which matches (a) and not (b)
+// is not encountered.)
+static bool isHandledGCPointerType(Type *T) {
+ // We fully support gc pointers
+ if (isGCPointerType(T))
+ return true;
+ // We partially support vectors of gc pointers. The code will assert if it
+ // can't handle something.
+ if (auto VT = dyn_cast<VectorType>(T))
+ if (isGCPointerType(VT->getElementType()))
+ return true;
+ return false;
}
#ifndef NDEBUG
-static bool isAggWhichContainsGCPtrType(Type *Ty) {
+/// Returns true if this type contains a gc pointer whether we know how to
+/// handle that type or not.
+static bool containsGCPtrType(Type *Ty) {
+ if (isGCPointerType(Ty))
+ return true;
if (VectorType *VT = dyn_cast<VectorType>(Ty))
return isGCPointerType(VT->getScalarType());
if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
- return isGCPointerType(AT->getElementType()) ||
- isAggWhichContainsGCPtrType(AT->getElementType());
+ return containsGCPtrType(AT->getElementType());
if (StructType *ST = dyn_cast<StructType>(Ty))
- return std::any_of(ST->subtypes().begin(), ST->subtypes().end(),
- [](Type *SubType) {
- return isGCPointerType(SubType) ||
- isAggWhichContainsGCPtrType(SubType);
- });
+ return std::any_of(
+ ST->subtypes().begin(), ST->subtypes().end(),
+ [](Type *SubType) { return containsGCPtrType(SubType); });
return false;
}
-#endif
-
-// Conservatively identifies any definitions which might be live at the
-// given instruction. The analysis is performed immediately before the
-// given instruction. Values defined by that instruction are not considered
-// live. Values used by that instruction are considered live.
-//
-// preconditions: valid IR graph, term is either a terminator instruction or
-// a call instruction, pred is the basic block of term, DT, LI are valid
-//
-// side effects: none, does not mutate IR
-//
-// postconditions: populates liveValues as discussed above
-static void findLiveGCValuesAtInst(Instruction *term, BasicBlock *pred,
- DominatorTree &DT, LoopInfo *LI,
- StatepointLiveSetTy &liveValues) {
- liveValues.clear();
-
- assert(isa<CallInst>(term) || isa<InvokeInst>(term) || term->isTerminator());
- Function *F = pred->getParent();
-
- auto is_live_gc_reference =
- [&](Value &V) { return isLiveGCReferenceAt(V, term, DT, LI); };
-
- // Are there any gc pointer arguments live over this point? This needs to be
- // special cased since arguments aren't defined in basic blocks.
- for (Argument &arg : F->args()) {
- assert(!isAggWhichContainsGCPtrType(arg.getType()) &&
- "support for FCA unimplemented");
-
- if (is_live_gc_reference(arg)) {
- liveValues.insert(&arg);
- }
- }
-
- // Walk through all dominating blocks - the ones which can contain
- // definitions used in this block - and check to see if any of the values
- // they define are used in locations potentially reachable from the
- // interesting instruction.
- BasicBlock *BBI = pred;
- while (true) {
- if (TraceLSP) {
- errs() << "[LSP] Looking at dominating block " << pred->getName() << "\n";
- }
- assert(DT.dominates(BBI, pred));
- assert(isPotentiallyReachable(BBI, pred, &DT) &&
- "dominated block must be reachable");
-
- // Walk through the instructions in dominating blocks and keep any
- // that have a use potentially reachable from the block we're
- // considering putting the safepoint in
- for (Instruction &inst : *BBI) {
- if (TraceLSP) {
- errs() << "[LSP] Looking at instruction ";
- inst.dump();
- }
-
- if (pred == BBI && (&inst) == term) {
- if (TraceLSP) {
- errs() << "[LSP] stopped because we encountered the safepoint "
- "instruction.\n";
- }
-
- // If we're in the block which defines the interesting instruction,
- // we don't want to include any values as live which are defined
- // _after_ the interesting line or as part of the line itself
- // i.e. "term" is the call instruction for a call safepoint, the
- // results of the call should not be considered live in that stackmap
- break;
- }
-
- assert(!isAggWhichContainsGCPtrType(inst.getType()) &&
- "support for FCA unimplemented");
-
- if (is_live_gc_reference(inst)) {
- if (TraceLSP) {
- errs() << "[LSP] found live value for this safepoint ";
- inst.dump();
- term->dump();
- }
- liveValues.insert(&inst);
- }
- }
- if (!DT.getNode(BBI)->getIDom()) {
- assert(BBI == &F->getEntryBlock() &&
- "failed to find a dominator for something other than "
- "the entry block");
- break;
- }
- BBI = DT.getNode(BBI)->getIDom()->getBlock();
- }
+// Returns true if this is a type which a) is a gc pointer or contains a GC
+// pointer and b) is of a type which the code doesn't expect (i.e. first class
+// aggregates). Used to trip assertions.
+static bool isUnhandledGCPointerType(Type *Ty) {
+ return containsGCPtrType(Ty) && !isHandledGCPointerType(Ty);
}
+#endif
static bool order_by_name(llvm::Value *a, llvm::Value *b) {
if (a->hasName() && b->hasName()) {
@@ -268,16 +216,17 @@ static bool order_by_name(llvm::Value *a, llvm::Value *b) {
}
}
-/// Find the initial live set. Note that due to base pointer
-/// insertion, the live set may be incomplete.
-static void
-analyzeParsePointLiveness(DominatorTree &DT, const CallSite &CS,
- PartiallyConstructedSafepointRecord &result) {
+// Conservatively identifies any definitions which might be live at the
+// given instruction. The analysis is performed immediately before the
+// given instruction. Values defined by that instruction are not considered
+// live. Values used by that instruction are considered live.
+static void analyzeParsePointLiveness(
+ DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData,
+ const CallSite &CS, PartiallyConstructedSafepointRecord &result) {
Instruction *inst = CS.getInstruction();
- BasicBlock *BB = inst->getParent();
StatepointLiveSetTy liveset;
- findLiveGCValuesAtInst(inst, BB, DT, nullptr, liveset);
+ findLiveSetAtInst(inst, OriginalLivenessData, liveset);
if (PrintLiveSet) {
// Note: This output is used by several of the test cases
@@ -299,10 +248,49 @@ analyzeParsePointLiveness(DominatorTree &DT, const CallSite &CS,
result.liveset = liveset;
}
-/// True iff this value is the null pointer constant (of any pointer type)
-static bool LLVM_ATTRIBUTE_UNUSED isNullConstant(Value *V) {
- return isa<Constant>(V) && isa<PointerType>(V->getType()) &&
- cast<Constant>(V)->isNullValue();
+/// If we can trivially determine that this vector contains only base pointers,
+/// return the base instruction.
+static Value *findBaseOfVector(Value *I) {
+ assert(I->getType()->isVectorTy() &&
+ cast<VectorType>(I->getType())->getElementType()->isPointerTy() &&
+ "Illegal to ask for the base pointer of a non-pointer type");
+
+ // Each case parallels findBaseDefiningValue below, see that code for
+ // detailed motivation.
+
+ if (isa<Argument>(I))
+ // An incoming argument to the function is a base pointer
+ return I;
+
+ // We shouldn't see the address of a global as a vector value?
+ assert(!isa<GlobalVariable>(I) &&
+ "unexpected global variable found in base of vector");
+
+ // inlining could possibly introduce phi node that contains
+ // undef if callee has multiple returns
+ if (isa<UndefValue>(I))
+ // utterly meaningless, but useful for dealing with partially optimized
+ // code.
+ return I;
+
+ // Due to inheritance, this must be _after_ the global variable and undef
+ // checks
+ if (Constant *Con = dyn_cast<Constant>(I)) {
+ assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
+ "order of checks wrong!");
+ assert(Con->isNullValue() && "null is the only case which makes sense");
+ return Con;
+ }
+
+ if (isa<LoadInst>(I))
+ return I;
+
+ // Note: This code is currently rather incomplete. We are essentially only
+ // handling cases where the vector element is trivially a base pointer. We
+ // need to update the entire base pointer construction algorithm to know how
+ // to track vector elements and potentially scalarize, but the case which
+ // would motivate the work hasn't shown up in real workloads yet.
+ llvm_unreachable("no base found for vector element");
}
/// Helper function for findBasePointer - Will return a value which either a)
@@ -312,52 +300,36 @@ static Value *findBaseDefiningValue(Value *I) {
assert(I->getType()->isPointerTy() &&
"Illegal to ask for the base pointer of a non-pointer type");
- // There are instructions which can never return gc pointer values. Sanity
- // check
- // that this is actually true.
- assert(!isa<InsertElementInst>(I) && !isa<ExtractElementInst>(I) &&
- !isa<ShuffleVectorInst>(I) && "Vector types are not gc pointers");
- assert((!isa<Instruction>(I) || isa<InvokeInst>(I) ||
- !cast<Instruction>(I)->isTerminator()) &&
- "With the exception of invoke terminators don't define values");
- assert(!isa<StoreInst>(I) && !isa<FenceInst>(I) &&
- "Can't be definitions to start with");
- assert(!isa<ICmpInst>(I) && !isa<FCmpInst>(I) &&
- "Comparisons don't give ops");
- // There's a bunch of instructions which just don't make sense to apply to
- // a pointer. The only valid reason for this would be pointer bit
- // twiddling which we're just not going to support.
- assert((!isa<Instruction>(I) || !cast<Instruction>(I)->isBinaryOp()) &&
- "Binary ops on pointer values are meaningless. Unless your "
- "bit-twiddling which we don't support");
-
- if (Argument *Arg = dyn_cast<Argument>(I)) {
+ // This case is a bit of a hack - it only handles extracts from vectors which
+ // trivially contain only base pointers. See note inside the function for
+ // how to improve this.
+ if (auto *EEI = dyn_cast<ExtractElementInst>(I)) {
+ Value *VectorOperand = EEI->getVectorOperand();
+ Value *VectorBase = findBaseOfVector(VectorOperand);
+ (void)VectorBase;
+ assert(VectorBase && "extract element not known to be a trivial base");
+ return EEI;
+ }
+
+ if (isa<Argument>(I))
// An incoming argument to the function is a base pointer
// We should have never reached here if this argument isn't an gc value
- assert(Arg->getType()->isPointerTy() &&
- "Base for pointer must be another pointer");
- return Arg;
- }
+ return I;
- if (GlobalVariable *global = dyn_cast<GlobalVariable>(I)) {
+ if (isa<GlobalVariable>(I))
// base case
- assert(global->getType()->isPointerTy() &&
- "Base for pointer must be another pointer");
- return global;
- }
+ return I;
// inlining could possibly introduce phi node that contains
// undef if callee has multiple returns
- if (UndefValue *undef = dyn_cast<UndefValue>(I)) {
- assert(undef->getType()->isPointerTy() &&
- "Base for pointer must be another pointer");
- return undef; // utterly meaningless, but useful for dealing with
- // partially optimized code.
- }
+ if (isa<UndefValue>(I))
+ // utterly meaningless, but useful for dealing with
+ // partially optimized code.
+ return I;
// Due to inheritance, this must be _after_ the global variable and undef
// checks
- if (Constant *con = dyn_cast<Constant>(I)) {
+ if (Constant *Con = dyn_cast<Constant>(I)) {
assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
"order of checks wrong!");
// Note: Finding a constant base for something marked for relocation
@@ -368,49 +340,30 @@ static Value *findBaseDefiningValue(Value *I) {
// off a potentially null value and have proven it null. We also use
// null pointers in dead paths of relocation phis (which we might later
// want to find a base pointer for).
- assert(con->getType()->isPointerTy() &&
- "Base for pointer must be another pointer");
- assert(con->isNullValue() && "null is the only case which makes sense");
- return con;
+ assert(isa<ConstantPointerNull>(Con) &&
+ "null is the only case which makes sense");
+ return Con;
}
if (CastInst *CI = dyn_cast<CastInst>(I)) {
- Value *def = CI->stripPointerCasts();
- assert(def->getType()->isPointerTy() &&
- "Base for pointer must be another pointer");
+ Value *Def = CI->stripPointerCasts();
// If we find a cast instruction here, it means we've found a cast which is
// not simply a pointer cast (i.e. an inttoptr). We don't know how to
// handle int->ptr conversion.
- assert(!isa<CastInst>(def) && "shouldn't find another cast here");
- return findBaseDefiningValue(def);
+ assert(!isa<CastInst>(Def) && "shouldn't find another cast here");
+ return findBaseDefiningValue(Def);
}
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (LI->getType()->isPointerTy()) {
- Value *Op = LI->getOperand(0);
- (void)Op;
- // Has to be a pointer to an gc object, or possibly an array of such?
- assert(Op->getType()->isPointerTy());
- return LI; // The value loaded is an gc base itself
- }
- }
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
- Value *Op = GEP->getOperand(0);
- if (Op->getType()->isPointerTy()) {
- return findBaseDefiningValue(Op); // The base of this GEP is the base
- }
- }
+ if (isa<LoadInst>(I))
+ return I; // The value loaded is an gc base itself
- if (AllocaInst *alloc = dyn_cast<AllocaInst>(I)) {
- // An alloca represents a conceptual stack slot. It's the slot itself
- // that the GC needs to know about, not the value in the slot.
- assert(alloc->getType()->isPointerTy() &&
- "Base for pointer must be another pointer");
- return alloc;
- }
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
+ // The base of this GEP is the base
+ return findBaseDefiningValue(GEP->getPointerOperand());
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
+ case Intrinsic::experimental_gc_result_ptr:
default:
// fall through to general call handling
break;
@@ -418,11 +371,6 @@ static Value *findBaseDefiningValue(Value *I) {
case Intrinsic::experimental_gc_result_float:
case Intrinsic::experimental_gc_result_int:
llvm_unreachable("these don't produce pointers");
- case Intrinsic::experimental_gc_result_ptr:
- // This is just a special case of the CallInst check below to handle a
- // statepoint with deopt args which hasn't been rewritten for GC yet.
- // TODO: Assert that the statepoint isn't rewritten yet.
- return II;
case Intrinsic::experimental_gc_relocate: {
// Rerunning safepoint insertion after safepoints are already
// inserted is not supported. It could probably be made to work,
@@ -440,41 +388,27 @@ static Value *findBaseDefiningValue(Value *I) {
// We assume that functions in the source language only return base
// pointers. This should probably be generalized via attributes to support
// both source language and internal functions.
- if (CallInst *call = dyn_cast<CallInst>(I)) {
- assert(call->getType()->isPointerTy() &&
- "Base for pointer must be another pointer");
- return call;
- }
- if (InvokeInst *invoke = dyn_cast<InvokeInst>(I)) {
- assert(invoke->getType()->isPointerTy() &&
- "Base for pointer must be another pointer");
- return invoke;
- }
+ if (isa<CallInst>(I) || isa<InvokeInst>(I))
+ return I;
// I have absolutely no idea how to implement this part yet. It's not
// neccessarily hard, I just haven't really looked at it yet.
assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented");
- if (AtomicCmpXchgInst *cas = dyn_cast<AtomicCmpXchgInst>(I)) {
+ if (isa<AtomicCmpXchgInst>(I))
// A CAS is effectively a atomic store and load combined under a
// predicate. From the perspective of base pointers, we just treat it
- // like a load. We loaded a pointer from a address in memory, that value
- // had better be a valid base pointer.
- return cas->getPointerOperand();
- }
- if (AtomicRMWInst *atomic = dyn_cast<AtomicRMWInst>(I)) {
- assert(AtomicRMWInst::Xchg == atomic->getOperation() &&
- "All others are binary ops which don't apply to base pointers");
- // semantically, a load, store pair. Treat it the same as a standard load
- return atomic->getPointerOperand();
- }
+ // like a load.
+ return I;
+
+ assert(!isa<AtomicRMWInst>(I) && "Xchg handled above, all others are "
+ "binary ops which don't apply to pointers");
// The aggregate ops. Aggregates can either be in the heap or on the
// stack, but in either case, this is simply a field load. As a result,
// this is a defining definition of the base just like a load is.
- if (ExtractValueInst *ev = dyn_cast<ExtractValueInst>(I)) {
- return ev;
- }
+ if (isa<ExtractValueInst>(I))
+ return I;
// We should never see an insert vector since that would require we be
// tracing back a struct value not a pointer value.
@@ -485,23 +419,21 @@ static Value *findBaseDefiningValue(Value *I) {
// return a value which dynamically selects from amoung several base
// derived pointers (each with it's own base potentially). It's the job of
// the caller to resolve these.
- if (SelectInst *select = dyn_cast<SelectInst>(I)) {
- return select;
- }
-
- return cast<PHINode>(I);
+ assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
+ "missing instruction case in findBaseDefiningValing");
+ return I;
}
/// Returns the base defining value for this value.
-static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &cache) {
- Value *&Cached = cache[I];
+static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache) {
+ Value *&Cached = Cache[I];
if (!Cached) {
Cached = findBaseDefiningValue(I);
}
- assert(cache[I] != nullptr);
+ assert(Cache[I] != nullptr);
if (TraceLSP) {
- errs() << "fBDV-cached: " << I->getName() << " -> " << Cached->getName()
+ dbgs() << "fBDV-cached: " << I->getName() << " -> " << Cached->getName()
<< "\n";
}
return Cached;
@@ -509,25 +441,26 @@ static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &cache) {
/// Return a base pointer for this value if known. Otherwise, return it's
/// base defining value.
-static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &cache) {
- Value *def = findBaseDefiningValueCached(I, cache);
- auto Found = cache.find(def);
- if (Found != cache.end()) {
+static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) {
+ Value *Def = findBaseDefiningValueCached(I, Cache);
+ auto Found = Cache.find(Def);
+ if (Found != Cache.end()) {
// Either a base-of relation, or a self reference. Caller must check.
return Found->second;
}
// Only a BDV available
- return def;
+ return Def;
}
/// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV,
/// is it known to be a base pointer? Or do we need to continue searching.
-static bool isKnownBaseResult(Value *v) {
- if (!isa<PHINode>(v) && !isa<SelectInst>(v)) {
+static bool isKnownBaseResult(Value *V) {
+ if (!isa<PHINode>(V) && !isa<SelectInst>(V)) {
// no recursion possible
return true;
}
- if (cast<Instruction>(v)->getMetadata("is_base_value")) {
+ if (isa<Instruction>(V) &&
+ cast<Instruction>(V)->getMetadata("is_base_value")) {
// This is a previously inserted base phi or select. We know
// that this is a base value.
return true;
@@ -647,8 +580,7 @@ private:
/// from. For gc objects, this is simply itself. On success, returns a value
/// which is the base pointer. (This is reliable and can be used for
/// relocation.) On failure, returns nullptr.
-static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
- DenseSet<llvm::Value *> &NewInsertedDefs) {
+static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
Value *def = findBaseOrBDV(I, cache);
if (isKnownBaseResult(def)) {
@@ -687,7 +619,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
done = true;
// Since we're adding elements to 'states' as we run, we can't keep
// iterators into the set.
- SmallVector<Value*, 16> Keys;
+ SmallVector<Value *, 16> Keys;
Keys.reserve(states.size());
for (auto Pair : states) {
Value *V = Pair.first;
@@ -777,7 +709,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
// We want to keep naming deterministic in the loop that follows, so
// sort the keys before iteration. This is useful in allowing us to
// write stable tests. Note that there is no invalidation issue here.
- SmallVector<Value*, 16> Keys;
+ SmallVector<Value *, 16> Keys;
Keys.reserve(states.size());
for (auto Pair : states) {
Value *V = Pair.first;
@@ -792,13 +724,12 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
assert(!state.isUnknown() && "Optimistic algorithm didn't complete!");
if (!state.isConflict())
continue;
-
+
if (isa<PHINode>(v)) {
int num_preds =
std::distance(pred_begin(v->getParent()), pred_end(v->getParent()));
assert(num_preds > 0 && "how did we reach here");
PHINode *phi = PHINode::Create(v->getType(), num_preds, "base_phi", v);
- NewInsertedDefs.insert(phi);
// Add metadata marking this as a base value
auto *const_1 = ConstantInt::get(
Type::getInt32Ty(
@@ -815,7 +746,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
UndefValue *undef = UndefValue::get(sel->getType());
SelectInst *basesel = SelectInst::Create(sel->getCondition(), undef,
undef, "base_select", sel);
- NewInsertedDefs.insert(basesel);
// Add metadata marking this as a base value
auto *const_1 = ConstantInt::get(
Type::getInt32Ty(
@@ -838,7 +768,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
assert(!state.isUnknown() && "Optimistic algorithm didn't complete!");
if (!state.isConflict())
continue;
-
+
if (PHINode *basephi = dyn_cast<PHINode>(state.getBase())) {
PHINode *phi = cast<PHINode>(v);
unsigned NumPHIValues = phi->getNumIncomingValues();
@@ -866,8 +796,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
assert(states.count(base));
base = states[base].getBase();
assert(base != nullptr && "unknown PhiState!");
- assert(NewInsertedDefs.count(base) &&
- "should have already added this in a prev. iteration!");
}
// In essense this assert states: the only way two
@@ -898,7 +826,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
if (base->getType() != basephi->getType()) {
base = new BitCastInst(base, basephi->getType(), "cast",
InBB->getTerminator());
- NewInsertedDefs.insert(base);
}
basephi->addIncoming(base, InBB);
}
@@ -924,7 +851,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
// The cast is needed since base traversal may strip away bitcasts
if (base->getType() != basesel->getType()) {
base = new BitCastInst(base, basesel->getType(), "cast", basesel);
- NewInsertedDefs.insert(base);
}
basesel->setOperand(i, base);
}
@@ -979,18 +905,18 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
// post condition: PointerToBase contains one (derived, base) pair for every
// pointer in live. Note that derived can be equal to base if the original
// pointer was a base pointer.
-static void findBasePointers(const StatepointLiveSetTy &live,
- DenseMap<llvm::Value *, llvm::Value *> &PointerToBase,
- DominatorTree *DT, DefiningValueMapTy &DVCache,
- DenseSet<llvm::Value *> &NewInsertedDefs) {
+static void
+findBasePointers(const StatepointLiveSetTy &live,
+ DenseMap<llvm::Value *, llvm::Value *> &PointerToBase,
+ DominatorTree *DT, DefiningValueMapTy &DVCache) {
// For the naming of values inserted to be deterministic - which makes for
// much cleaner and more stable tests - we need to assign an order to the
// live values. DenseSets do not provide a deterministic order across runs.
- SmallVector<Value*, 64> Temp;
+ SmallVector<Value *, 64> Temp;
Temp.insert(Temp.end(), live.begin(), live.end());
std::sort(Temp.begin(), Temp.end(), order_by_name);
for (Value *ptr : Temp) {
- Value *base = findBasePointer(ptr, DVCache, NewInsertedDefs);
+ Value *base = findBasePointer(ptr, DVCache);
assert(base && "failed to find base pointer");
PointerToBase[ptr] = base;
assert((!isa<Instruction>(base) || !isa<Instruction>(ptr) ||
@@ -1001,10 +927,10 @@ static void findBasePointers(const StatepointLiveSetTy &live,
// If you see this trip and like to live really dangerously, the code should
// be correct, just with idioms the verifier can't handle. You can try
// disabling the verifier at your own substaintial risk.
- assert(!isNullConstant(base) && "the relocation code needs adjustment to "
- "handle the relocation of a null pointer "
- "constant without causing false positives "
- "in the safepoint ir verifier.");
+ assert(!isa<ConstantPointerNull>(base) &&
+ "the relocation code needs adjustment to handle the relocation of "
+ "a null pointer constant without causing false positives in the "
+ "safepoint ir verifier.");
}
}
@@ -1014,14 +940,13 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
const CallSite &CS,
PartiallyConstructedSafepointRecord &result) {
DenseMap<llvm::Value *, llvm::Value *> PointerToBase;
- DenseSet<llvm::Value *> NewInsertedDefs;
- findBasePointers(result.liveset, PointerToBase, &DT, DVCache, NewInsertedDefs);
+ findBasePointers(result.liveset, PointerToBase, &DT, DVCache);
if (PrintBasePointers) {
// Note: Need to print these in a stable order since this is checked in
// some tests.
errs() << "Base Pairs (w/o Relocation):\n";
- SmallVector<Value*, 64> Temp;
+ SmallVector<Value *, 64> Temp;
Temp.reserve(PointerToBase.size());
for (auto Pair : PointerToBase) {
Temp.push_back(Pair.first);
@@ -1029,90 +954,59 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
std::sort(Temp.begin(), Temp.end(), order_by_name);
for (Value *Ptr : Temp) {
Value *Base = PointerToBase[Ptr];
- errs() << " derived %" << Ptr->getName() << " base %"
- << Base->getName() << "\n";
+ errs() << " derived %" << Ptr->getName() << " base %" << Base->getName()
+ << "\n";
}
}
result.PointerToBase = PointerToBase;
- result.NewInsertedDefs = NewInsertedDefs;
}
-/// Check for liveness of items in the insert defs and add them to the live
-/// and base pointer sets
-static void fixupLiveness(DominatorTree &DT, const CallSite &CS,
- const DenseSet<Value *> &allInsertedDefs,
- PartiallyConstructedSafepointRecord &result) {
- Instruction *inst = CS.getInstruction();
-
- auto liveset = result.liveset;
- auto PointerToBase = result.PointerToBase;
+/// Given an updated version of the dataflow liveness results, update the
+/// liveset and base pointer maps for the call site CS.
+static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
+ const CallSite &CS,
+ PartiallyConstructedSafepointRecord &result);
- auto is_live_gc_reference =
- [&](Value &V) { return isLiveGCReferenceAt(V, inst, DT, nullptr); };
-
- // For each new definition, check to see if a) the definition dominates the
- // instruction we're interested in, and b) one of the uses of that definition
- // is edge-reachable from the instruction we're interested in. This is the
- // same definition of liveness we used in the intial liveness analysis
- for (Value *newDef : allInsertedDefs) {
- if (liveset.count(newDef)) {
- // already live, no action needed
- continue;
- }
-
- // PERF: Use DT to check instruction domination might not be good for
- // compilation time, and we could change to optimal solution if this
- // turn to be a issue
- if (!DT.dominates(cast<Instruction>(newDef), inst)) {
- // can't possibly be live at inst
- continue;
- }
-
- if (is_live_gc_reference(*newDef)) {
- // Add the live new defs into liveset and PointerToBase
- liveset.insert(newDef);
- PointerToBase[newDef] = newDef;
- }
- }
-
- result.liveset = liveset;
- result.PointerToBase = PointerToBase;
-}
-
-static void fixupLiveReferences(
- Function &F, DominatorTree &DT, Pass *P,
- const DenseSet<llvm::Value *> &allInsertedDefs,
- ArrayRef<CallSite> toUpdate,
+static void recomputeLiveInValues(
+ Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate,
MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
+ // TODO-PERF: reuse the original liveness, then simply run the dataflow
+ // again. The old values are still live and will help it stablize quickly.
+ GCPtrLivenessData RevisedLivenessData;
+ computeLiveInValues(DT, F, RevisedLivenessData);
for (size_t i = 0; i < records.size(); i++) {
struct PartiallyConstructedSafepointRecord &info = records[i];
const CallSite &CS = toUpdate[i];
- fixupLiveness(DT, CS, allInsertedDefs, info);
+ recomputeLiveInValues(RevisedLivenessData, CS, info);
}
}
-// Normalize basic block to make it ready to be target of invoke statepoint.
-// It means spliting it to have single predecessor. Return newly created BB
-// ready to be successor of invoke statepoint.
-static BasicBlock *normalizeBBForInvokeSafepoint(BasicBlock *BB,
- BasicBlock *InvokeParent,
- Pass *P) {
- BasicBlock *ret = BB;
-
+// When inserting gc.relocate calls, we need to ensure there are no uses
+// of the original value between the gc.statepoint and the gc.relocate call.
+// One case which can arise is a phi node starting one of the successor blocks.
+// We also need to be able to insert the gc.relocates only on the path which
+// goes through the statepoint. We might need to split an edge to make this
+// possible.
+static BasicBlock *
+normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, Pass *P) {
+ DominatorTree *DT = nullptr;
+ if (auto *DTP = P->getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+ DT = &DTP->getDomTree();
+
+ BasicBlock *Ret = BB;
if (!BB->getUniquePredecessor()) {
- ret = SplitBlockPredecessors(BB, InvokeParent, "");
+ Ret = SplitBlockPredecessors(BB, InvokeParent, "", nullptr, DT);
}
- // Another requirement for such basic blocks is to not have any phi nodes.
- // Since we just ensured that new BB will have single predecessor,
- // all phi nodes in it will have one value. Here it would be naturall place
- // to
- // remove them all. But we can not do this because we are risking to remove
- // one of the values stored in liveset of another statepoint. We will do it
- // later after placing all safepoints.
+ // Now that 'ret' has unique predecessor we can safely remove all phi nodes
+ // from it
+ FoldSingleEntryPHINodes(Ret);
+ assert(!isa<PHINode>(Ret->begin()));
- return ret;
+ // At this point, we can safely insert a gc.relocate as the first instruction
+ // in Ret if needed.
+ return Ret;
}
static int find_index(ArrayRef<Value *> livevec, Value *val) {
@@ -1180,7 +1074,7 @@ static void CreateGCRelocates(ArrayRef<llvm::Value *> liveVariables,
// combination. This results is some blow up the function declarations in
// the IR, but removes the need for argument bitcasts which shrinks the IR
// greatly and makes it much more readable.
- SmallVector<Type *, 1> types; // one per 'any' type
+ SmallVector<Type *, 1> types; // one per 'any' type
types.push_back(liveVariables[i]->getType()); // result type
Value *gc_relocate_decl = Intrinsic::getDeclaration(
M, Intrinsic::experimental_gc_relocate, types);
@@ -1298,8 +1192,10 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */
token = invoke;
// Generate gc relocates in exceptional path
- BasicBlock *unwindBlock = normalizeBBForInvokeSafepoint(
- toReplace->getUnwindDest(), invoke->getParent(), P);
+ BasicBlock *unwindBlock = toReplace->getUnwindDest();
+ assert(!isa<PHINode>(unwindBlock->begin()) &&
+ unwindBlock->getUniquePredecessor() &&
+ "can't safely insert in this block!");
Instruction *IP = &*(unwindBlock->getFirstInsertionPt());
Builder.SetInsertPoint(IP);
@@ -1319,8 +1215,10 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */
exceptional_token, Builder);
// Generate gc relocates and returns for normal block
- BasicBlock *normalDest = normalizeBBForInvokeSafepoint(
- toReplace->getNormalDest(), invoke->getParent(), P);
+ BasicBlock *normalDest = toReplace->getNormalDest();
+ assert(!isa<PHINode>(normalDest->begin()) &&
+ normalDest->getUniquePredecessor() &&
+ "can't safely insert in this block!");
IP = &*(normalDest->getFirstInsertionPt());
Builder.SetInsertPoint(IP);
@@ -1333,7 +1231,7 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */
// Take the name of the original value call if it had one.
token->takeName(CS.getInstruction());
- // The GCResult is already inserted, we just need to find it
+// The GCResult is already inserted, we just need to find it
#ifndef NDEBUG
Instruction *toReplace = CS.getInstruction();
assert((toReplace->hasNUses(0) || toReplace->hasNUses(1)) &&
@@ -1351,7 +1249,6 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */
// Second, create a gc.relocate for every live variable
CreateGCRelocates(liveVariables, live_start, basePtrs, token, Builder);
-
}
namespace {
@@ -1383,7 +1280,7 @@ static void stablize_order(SmallVectorImpl<Value *> &basevec,
// Replace an existing gc.statepoint with a new one and a set of gc.relocates
// which make the relocations happening at this safepoint explicit.
-//
+//
// WARNING: Does not do any fixup to adjust users of the original live
// values. That's the callers responsibility.
static void
@@ -1458,14 +1355,13 @@ static void relocationViaAlloca(
Function &F, DominatorTree &DT, ArrayRef<Value *> live,
ArrayRef<struct PartiallyConstructedSafepointRecord> records) {
#ifndef NDEBUG
- int initialAllocaNum = 0;
-
- // record initial number of allocas
- for (inst_iterator itr = inst_begin(F), end = inst_end(F); itr != end;
- itr++) {
- if (isa<AllocaInst>(*itr))
- initialAllocaNum++;
- }
+ // record initial number of (static) allocas; we'll check we have the same
+ // number when we get done.
+ int InitialAllocaNum = 0;
+ for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E;
+ I++)
+ if (isa<AllocaInst>(*I))
+ InitialAllocaNum++;
#endif
// TODO-PERF: change data structures, reserve
@@ -1505,47 +1401,49 @@ static void relocationViaAlloca(
// In case if it was invoke statepoint
// we will insert stores for exceptional path gc relocates.
if (isa<InvokeInst>(Statepoint)) {
- insertRelocationStores(info.UnwindToken->users(),
- allocaMap, visitedLiveValues);
+ insertRelocationStores(info.UnwindToken->users(), allocaMap,
+ visitedLiveValues);
}
-#ifndef NDEBUG
- // As a debuging aid, pretend that an unrelocated pointer becomes null at
- // the gc.statepoint. This will turn some subtle GC problems into slightly
- // easier to debug SEGVs
- SmallVector<AllocaInst *, 64> ToClobber;
- for (auto Pair : allocaMap) {
- Value *Def = Pair.first;
- AllocaInst *Alloca = cast<AllocaInst>(Pair.second);
-
- // This value was relocated
- if (visitedLiveValues.count(Def)) {
- continue;
+ if (ClobberNonLive) {
+ // As a debuging aid, pretend that an unrelocated pointer becomes null at
+ // the gc.statepoint. This will turn some subtle GC problems into
+ // slightly easier to debug SEGVs. Note that on large IR files with
+ // lots of gc.statepoints this is extremely costly both memory and time
+ // wise.
+ SmallVector<AllocaInst *, 64> ToClobber;
+ for (auto Pair : allocaMap) {
+ Value *Def = Pair.first;
+ AllocaInst *Alloca = cast<AllocaInst>(Pair.second);
+
+ // This value was relocated
+ if (visitedLiveValues.count(Def)) {
+ continue;
+ }
+ ToClobber.push_back(Alloca);
}
- ToClobber.push_back(Alloca);
- }
- auto InsertClobbersAt = [&](Instruction *IP) {
- for (auto *AI : ToClobber) {
- auto AIType = cast<PointerType>(AI->getType());
- auto PT = cast<PointerType>(AIType->getElementType());
- Constant *CPN = ConstantPointerNull::get(PT);
- StoreInst *store = new StoreInst(CPN, AI);
- store->insertBefore(IP);
- }
- };
+ auto InsertClobbersAt = [&](Instruction *IP) {
+ for (auto *AI : ToClobber) {
+ auto AIType = cast<PointerType>(AI->getType());
+ auto PT = cast<PointerType>(AIType->getElementType());
+ Constant *CPN = ConstantPointerNull::get(PT);
+ StoreInst *store = new StoreInst(CPN, AI);
+ store->insertBefore(IP);
+ }
+ };
- // Insert the clobbering stores. These may get intermixed with the
- // gc.results and gc.relocates, but that's fine.
- if (auto II = dyn_cast<InvokeInst>(Statepoint)) {
- InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt());
- InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt());
- } else {
- BasicBlock::iterator Next(cast<CallInst>(Statepoint));
- Next++;
- InsertClobbersAt(Next);
+ // Insert the clobbering stores. These may get intermixed with the
+ // gc.results and gc.relocates, but that's fine.
+ if (auto II = dyn_cast<InvokeInst>(Statepoint)) {
+ InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt());
+ InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt());
+ } else {
+ BasicBlock::iterator Next(cast<CallInst>(Statepoint));
+ Next++;
+ InsertClobbersAt(Next);
+ }
}
-#endif
}
// update use with load allocas and add store for gc_relocated
for (auto Pair : allocaMap) {
@@ -1603,11 +1501,11 @@ static void relocationViaAlloca(
assert(!inst->isTerminator() &&
"The only TerminatorInst that can produce a value is "
"InvokeInst which is handled above.");
- store->insertAfter(inst);
+ store->insertAfter(inst);
}
} else {
assert((isa<Argument>(def) || isa<GlobalVariable>(def) ||
- (isa<Constant>(def) && cast<Constant>(def)->isNullValue())) &&
+ isa<ConstantPointerNull>(def)) &&
"Must be argument or global");
store->insertAfter(cast<Instruction>(alloca));
}
@@ -1621,12 +1519,11 @@ static void relocationViaAlloca(
}
#ifndef NDEBUG
- for (inst_iterator itr = inst_begin(F), end = inst_end(F); itr != end;
- itr++) {
- if (isa<AllocaInst>(*itr))
- initialAllocaNum--;
- }
- assert(initialAllocaNum == 0 && "We must not introduce any extra allocas");
+ for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E;
+ I++)
+ if (isa<AllocaInst>(*I))
+ InitialAllocaNum--;
+ assert(InitialAllocaNum == 0 && "We must not introduce any extra allocas");
#endif
}
@@ -1647,76 +1544,155 @@ template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) {
}
}
-static Function *getUseHolder(Module &M) {
- FunctionType *ftype =
- FunctionType::get(Type::getVoidTy(M.getContext()), true);
- Function *Func = cast<Function>(M.getOrInsertFunction("__tmp_use", ftype));
- return Func;
-}
-
/// Insert holders so that each Value is obviously live through the entire
-/// liftetime of the call.
+/// lifetime of the call.
static void insertUseHolderAfter(CallSite &CS, const ArrayRef<Value *> Values,
- SmallVectorImpl<CallInst *> &holders) {
+ SmallVectorImpl<CallInst *> &Holders) {
+ if (Values.empty())
+ // No values to hold live, might as well not insert the empty holder
+ return;
+
Module *M = CS.getInstruction()->getParent()->getParent()->getParent();
- Function *Func = getUseHolder(*M);
+ // Use a dummy vararg function to actually hold the values live
+ Function *Func = cast<Function>(M->getOrInsertFunction(
+ "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true)));
if (CS.isCall()) {
// For call safepoints insert dummy calls right after safepoint
- BasicBlock::iterator next(CS.getInstruction());
- next++;
- CallInst *base_holder = CallInst::Create(Func, Values, "", next);
- holders.push_back(base_holder);
- } else if (CS.isInvoke()) {
- // For invoke safepooints insert dummy calls both in normal and
- // exceptional destination blocks
- InvokeInst *invoke = cast<InvokeInst>(CS.getInstruction());
- CallInst *normal_holder = CallInst::Create(
- Func, Values, "", invoke->getNormalDest()->getFirstInsertionPt());
- CallInst *unwind_holder = CallInst::Create(
- Func, Values, "", invoke->getUnwindDest()->getFirstInsertionPt());
- holders.push_back(normal_holder);
- holders.push_back(unwind_holder);
- } else
- llvm_unreachable("unsupported call type");
+ BasicBlock::iterator Next(CS.getInstruction());
+ Next++;
+ Holders.push_back(CallInst::Create(Func, Values, "", Next));
+ return;
+ }
+ // For invoke safepooints insert dummy calls both in normal and
+ // exceptional destination blocks
+ auto *II = cast<InvokeInst>(CS.getInstruction());
+ Holders.push_back(CallInst::Create(
+ Func, Values, "", II->getNormalDest()->getFirstInsertionPt()));
+ Holders.push_back(CallInst::Create(
+ Func, Values, "", II->getUnwindDest()->getFirstInsertionPt()));
}
static void findLiveReferences(
Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate,
MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
+ GCPtrLivenessData OriginalLivenessData;
+ computeLiveInValues(DT, F, OriginalLivenessData);
for (size_t i = 0; i < records.size(); i++) {
struct PartiallyConstructedSafepointRecord &info = records[i];
const CallSite &CS = toUpdate[i];
- analyzeParsePointLiveness(DT, CS, info);
+ analyzeParsePointLiveness(DT, OriginalLivenessData, CS, info);
}
}
-static void addBasesAsLiveValues(StatepointLiveSetTy &liveset,
- DenseMap<Value *, Value *> &PointerToBase) {
- // Identify any base pointers which are used in this safepoint, but not
- // themselves relocated. We need to relocate them so that later inserted
- // safepoints can get the properly relocated base register.
- DenseSet<Value *> missing;
- for (Value *L : liveset) {
- assert(PointerToBase.find(L) != PointerToBase.end());
- Value *base = PointerToBase[L];
- assert(base);
- if (liveset.find(base) == liveset.end()) {
- assert(PointerToBase.find(base) == PointerToBase.end());
- // uniqued by set insert
- missing.insert(base);
+/// Remove any vector of pointers from the liveset by scalarizing them over the
+/// statepoint instruction. Adds the scalarized pieces to the liveset. It
+/// would be preferrable to include the vector in the statepoint itself, but
+/// the lowering code currently does not handle that. Extending it would be
+/// slightly non-trivial since it requires a format change. Given how rare
+/// such cases are (for the moment?) scalarizing is an acceptable comprimise.
+static void splitVectorValues(Instruction *StatepointInst,
+ StatepointLiveSetTy &LiveSet, DominatorTree &DT) {
+ SmallVector<Value *, 16> ToSplit;
+ for (Value *V : LiveSet)
+ if (isa<VectorType>(V->getType()))
+ ToSplit.push_back(V);
+
+ if (ToSplit.empty())
+ return;
+
+ Function &F = *(StatepointInst->getParent()->getParent());
+
+ DenseMap<Value *, AllocaInst *> AllocaMap;
+ // First is normal return, second is exceptional return (invoke only)
+ DenseMap<Value *, std::pair<Value *, Value *>> Replacements;
+ for (Value *V : ToSplit) {
+ LiveSet.erase(V);
+
+ AllocaInst *Alloca =
+ new AllocaInst(V->getType(), "", F.getEntryBlock().getFirstNonPHI());
+ AllocaMap[V] = Alloca;
+
+ VectorType *VT = cast<VectorType>(V->getType());
+ IRBuilder<> Builder(StatepointInst);
+ SmallVector<Value *, 16> Elements;
+ for (unsigned i = 0; i < VT->getNumElements(); i++)
+ Elements.push_back(Builder.CreateExtractElement(V, Builder.getInt32(i)));
+ LiveSet.insert(Elements.begin(), Elements.end());
+
+ auto InsertVectorReform = [&](Instruction *IP) {
+ Builder.SetInsertPoint(IP);
+ Builder.SetCurrentDebugLocation(IP->getDebugLoc());
+ Value *ResultVec = UndefValue::get(VT);
+ for (unsigned i = 0; i < VT->getNumElements(); i++)
+ ResultVec = Builder.CreateInsertElement(ResultVec, Elements[i],
+ Builder.getInt32(i));
+ return ResultVec;
+ };
+
+ if (isa<CallInst>(StatepointInst)) {
+ BasicBlock::iterator Next(StatepointInst);
+ Next++;
+ Instruction *IP = &*(Next);
+ Replacements[V].first = InsertVectorReform(IP);
+ Replacements[V].second = nullptr;
+ } else {
+ InvokeInst *Invoke = cast<InvokeInst>(StatepointInst);
+ // We've already normalized - check that we don't have shared destination
+ // blocks
+ BasicBlock *NormalDest = Invoke->getNormalDest();
+ assert(!isa<PHINode>(NormalDest->begin()));
+ BasicBlock *UnwindDest = Invoke->getUnwindDest();
+ assert(!isa<PHINode>(UnwindDest->begin()));
+ // Insert insert element sequences in both successors
+ Instruction *IP = &*(NormalDest->getFirstInsertionPt());
+ Replacements[V].first = InsertVectorReform(IP);
+ IP = &*(UnwindDest->getFirstInsertionPt());
+ Replacements[V].second = InsertVectorReform(IP);
}
}
+ for (Value *V : ToSplit) {
+ AllocaInst *Alloca = AllocaMap[V];
+
+ // Capture all users before we start mutating use lists
+ SmallVector<Instruction *, 16> Users;
+ for (User *U : V->users())
+ Users.push_back(cast<Instruction>(U));
+
+ for (Instruction *I : Users) {
+ if (auto Phi = dyn_cast<PHINode>(I)) {
+ for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++)
+ if (V == Phi->getIncomingValue(i)) {
+ LoadInst *Load = new LoadInst(
+ Alloca, "", Phi->getIncomingBlock(i)->getTerminator());
+ Phi->setIncomingValue(i, Load);
+ }
+ } else {
+ LoadInst *Load = new LoadInst(Alloca, "", I);
+ I->replaceUsesOfWith(V, Load);
+ }
+ }
- // Note that we want these at the end of the list, otherwise
- // register placement gets screwed up once we lower to STATEPOINT
- // instructions. This is an utter hack, but there doesn't seem to be a
- // better one.
- for (Value *base : missing) {
- assert(base);
- liveset.insert(base);
- PointerToBase[base] = base;
- }
- assert(liveset.size() == PointerToBase.size());
+ // Store the original value and the replacement value into the alloca
+ StoreInst *Store = new StoreInst(V, Alloca);
+ if (auto I = dyn_cast<Instruction>(V))
+ Store->insertAfter(I);
+ else
+ Store->insertAfter(Alloca);
+
+ // Normal return for invoke, or call return
+ Instruction *Replacement = cast<Instruction>(Replacements[V].first);
+ (new StoreInst(Replacement, Alloca))->insertAfter(Replacement);
+ // Unwind return for invoke only
+ Replacement = cast_or_null<Instruction>(Replacements[V].second);
+ if (Replacement)
+ (new StoreInst(Replacement, Alloca))->insertAfter(Replacement);
+ }
+
+ // apply mem2reg to promote alloca to SSA
+ SmallVector<AllocaInst *, 16> Allocas;
+ for (Value *V : ToSplit)
+ Allocas.push_back(AllocaMap[V]);
+ PromoteMemToReg(Allocas, DT);
}
static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
@@ -1734,6 +1710,20 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
}
#endif
+ // When inserting gc.relocates for invokes, we need to be able to insert at
+ // the top of the successor blocks. See the comment on
+ // normalForInvokeSafepoint on exactly what is needed. Note that this step
+ // may restructure the CFG.
+ for (CallSite CS : toUpdate) {
+ if (!CS.isInvoke())
+ continue;
+ InvokeInst *invoke = cast<InvokeInst>(CS.getInstruction());
+ normalizeForInvokeSafepoint(invoke->getNormalDest(), invoke->getParent(),
+ P);
+ normalizeForInvokeSafepoint(invoke->getUnwindDest(), invoke->getParent(),
+ P);
+ }
+
// A list of dummy calls added to the IR to keep various values obviously
// live in the IR. We'll remove all of these when done.
SmallVector<CallInst *, 64> holders;
@@ -1749,7 +1739,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
SmallVector<Value *, 64> DeoptValues;
for (Use &U : StatepointCS.vm_state_args()) {
Value *Arg = cast<Value>(&U);
- if (isGCPointerType(Arg->getType()))
+ assert(!isUnhandledGCPointerType(Arg->getType()) &&
+ "support for FCA unimplemented");
+ if (isHandledGCPointerType(Arg->getType()))
DeoptValues.push_back(Arg);
}
insertUseHolderAfter(CS, DeoptValues, holders);
@@ -1767,6 +1759,17 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
// site.
findLiveReferences(F, DT, P, toUpdate, records);
+ // Do a limited scalarization of any live at safepoint vector values which
+ // contain pointers. This enables this pass to run after vectorization at
+ // the cost of some possible performance loss. TODO: it would be nice to
+ // natively support vectors all the way through the backend so we don't need
+ // to scalarize here.
+ for (size_t i = 0; i < records.size(); i++) {
+ struct PartiallyConstructedSafepointRecord &info = records[i];
+ Instruction *statepoint = toUpdate[i].getInstruction();
+ splitVectorValues(cast<Instruction>(statepoint), info.liveset, DT);
+ }
+
// B) Find the base pointers for each live pointer
/* scope for caching */ {
// Cache the 'defining value' relation used in the computation and
@@ -1790,13 +1793,6 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
// gep a + 1
// safepoint 2
// br loop
- DenseSet<llvm::Value *> allInsertedDefs;
- for (size_t i = 0; i < records.size(); i++) {
- struct PartiallyConstructedSafepointRecord &info = records[i];
- allInsertedDefs.insert(info.NewInsertedDefs.begin(),
- info.NewInsertedDefs.end());
- }
-
// We insert some dummy calls after each safepoint to definitely hold live
// the base pointers which were identified for that safepoint. We'll then
// ask liveness for _every_ base inserted to see what is now live. Then we
@@ -1813,22 +1809,11 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
insertUseHolderAfter(CS, Bases, holders);
}
- // Add the bases explicitly to the live vector set. This may result in a few
- // extra relocations, but the base has to be available whenever a pointer
- // derived from it is used. Thus, we need it to be part of the statepoint's
- // gc arguments list. TODO: Introduce an explicit notion (in the following
- // code) of the GC argument list as seperate from the live Values at a
- // given statepoint.
- for (size_t i = 0; i < records.size(); i++) {
- struct PartiallyConstructedSafepointRecord &info = records[i];
- addBasesAsLiveValues(info.liveset, info.PointerToBase);
- }
+ // By selecting base pointers, we've effectively inserted new uses. Thus, we
+ // need to rerun liveness. We may *also* have inserted new defs, but that's
+ // not the key issue.
+ recomputeLiveInValues(F, DT, P, toUpdate, records);
- // If we inserted any new values, we need to adjust our notion of what is
- // live at a particular safepoint.
- if (!allInsertedDefs.empty()) {
- fixupLiveReferences(F, DT, P, allInsertedDefs, toUpdate, records);
- }
if (PrintBasePointers) {
for (size_t i = 0; i < records.size(); i++) {
struct PartiallyConstructedSafepointRecord &info = records[i];
@@ -1858,25 +1843,6 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
}
toUpdate.clear(); // prevent accident use of invalid CallSites
- // In case if we inserted relocates in a different basic block than the
- // original safepoint (this can happen for invokes). We need to be sure that
- // original values were not used in any of the phi nodes at the
- // beginning of basic block containing them. Because we know that all such
- // blocks will have single predecessor we can safely assume that all phi
- // nodes have single entry (because of normalizeBBForInvokeSafepoint).
- // Just remove them all here.
- for (size_t i = 0; i < records.size(); i++) {
- Instruction *I = records[i].StatepointToken;
-
- if (InvokeInst *invoke = dyn_cast<InvokeInst>(I)) {
- FoldSingleEntryPHINodes(invoke->getNormalDest());
- assert(!isa<PHINode>(invoke->getNormalDest()->begin()));
-
- FoldSingleEntryPHINodes(invoke->getUnwindDest());
- assert(!isa<PHINode>(invoke->getUnwindDest()->begin()));
- }
- }
-
// Do all the fixups of the original live variables to their relocated selves
SmallVector<Value *, 128> live;
for (size_t i = 0; i < records.size(); i++) {
@@ -1889,6 +1855,24 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
Statepoint statepoint(info.StatepointToken);
live.insert(live.end(), statepoint.gc_args_begin(),
statepoint.gc_args_end());
+#ifndef NDEBUG
+ // Do some basic sanity checks on our liveness results before performing
+ // relocation. Relocation can and will turn mistakes in liveness results
+ // into non-sensical code which is must harder to debug.
+ // TODO: It would be nice to test consistency as well
+ assert(DT.isReachableFromEntry(info.StatepointToken->getParent()) &&
+ "statepoint must be reachable or liveness is meaningless");
+ for (Value *V : statepoint.gc_args()) {
+ if (!isa<Instruction>(V))
+ // Non-instruction values trivial dominate all possible uses
+ continue;
+ auto LiveInst = cast<Instruction>(V);
+ assert(DT.isReachableFromEntry(LiveInst->getParent()) &&
+ "unreachable values should never be live");
+ assert(DT.dominates(LiveInst, info.StatepointToken) &&
+ "basic SSA liveness expectation violated by liveness analysis");
+ }
+#endif
}
unique_unsorted(live);
@@ -1924,18 +1908,285 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F) {
if (!shouldRewriteStatepointsIn(F))
return false;
- // Gather all the statepoints which need rewritten.
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ // Gather all the statepoints which need rewritten. Be careful to only
+ // consider those in reachable code since we need to ask dominance queries
+ // when rewriting. We'll delete the unreachable ones in a moment.
SmallVector<CallSite, 64> ParsePointNeeded;
+ bool HasUnreachableStatepoint = false;
for (Instruction &I : inst_range(F)) {
// TODO: only the ones with the flag set!
- if (isStatepoint(I))
- ParsePointNeeded.push_back(CallSite(&I));
+ if (isStatepoint(I)) {
+ if (DT.isReachableFromEntry(I.getParent()))
+ ParsePointNeeded.push_back(CallSite(&I));
+ else
+ HasUnreachableStatepoint = true;
+ }
}
+ bool MadeChange = false;
+
+ // Delete any unreachable statepoints so that we don't have unrewritten
+ // statepoints surviving this pass. This makes testing easier and the
+ // resulting IR less confusing to human readers. Rather than be fancy, we
+ // just reuse a utility function which removes the unreachable blocks.
+ if (HasUnreachableStatepoint)
+ MadeChange |= removeUnreachableBlocks(F);
+
// Return early if no work to do.
if (ParsePointNeeded.empty())
- return false;
+ return MadeChange;
+
+ // As a prepass, go ahead and aggressively destroy single entry phi nodes.
+ // These are created by LCSSA. They have the effect of increasing the size
+ // of liveness sets for no good reason. It may be harder to do this post
+ // insertion since relocations and base phis can confuse things.
+ for (BasicBlock &BB : F)
+ if (BB.getUniquePredecessor()) {
+ MadeChange = true;
+ FoldSingleEntryPHINodes(&BB);
+ }
- DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- return insertParsePoints(F, DT, this, ParsePointNeeded);
+ MadeChange |= insertParsePoints(F, DT, this, ParsePointNeeded);
+ return MadeChange;
+}
+
+// liveness computation via standard dataflow
+// -------------------------------------------------------------------
+
+// TODO: Consider using bitvectors for liveness, the set of potentially
+// interesting values should be small and easy to pre-compute.
+
+/// Is this value a constant consisting of entirely null values?
+static bool isConstantNull(Value *V) {
+ return isa<Constant>(V) && cast<Constant>(V)->isNullValue();
+}
+
+/// Compute the live-in set for the location rbegin starting from
+/// the live-out set of the basic block
+static void computeLiveInValues(BasicBlock::reverse_iterator rbegin,
+ BasicBlock::reverse_iterator rend,
+ DenseSet<Value *> &LiveTmp) {
+
+ for (BasicBlock::reverse_iterator ritr = rbegin; ritr != rend; ritr++) {
+ Instruction *I = &*ritr;
+
+ // KILL/Def - Remove this definition from LiveIn
+ LiveTmp.erase(I);
+
+ // Don't consider *uses* in PHI nodes, we handle their contribution to
+ // predecessor blocks when we seed the LiveOut sets
+ if (isa<PHINode>(I))
+ continue;
+
+ // USE - Add to the LiveIn set for this instruction
+ for (Value *V : I->operands()) {
+ assert(!isUnhandledGCPointerType(V->getType()) &&
+ "support for FCA unimplemented");
+ if (isHandledGCPointerType(V->getType()) && !isConstantNull(V) &&
+ !isa<UndefValue>(V)) {
+ // The choice to exclude null and undef is arbitrary here. Reconsider?
+ LiveTmp.insert(V);
+ }
+ }
+ }
+}
+
+static void computeLiveOutSeed(BasicBlock *BB, DenseSet<Value *> &LiveTmp) {
+
+ for (BasicBlock *Succ : successors(BB)) {
+ const BasicBlock::iterator E(Succ->getFirstNonPHI());
+ for (BasicBlock::iterator I = Succ->begin(); I != E; I++) {
+ PHINode *Phi = cast<PHINode>(&*I);
+ Value *V = Phi->getIncomingValueForBlock(BB);
+ assert(!isUnhandledGCPointerType(V->getType()) &&
+ "support for FCA unimplemented");
+ if (isHandledGCPointerType(V->getType()) && !isConstantNull(V) &&
+ !isa<UndefValue>(V)) {
+ // The choice to exclude null and undef is arbitrary here. Reconsider?
+ LiveTmp.insert(V);
+ }
+ }
+ }
+}
+
+static DenseSet<Value *> computeKillSet(BasicBlock *BB) {
+ DenseSet<Value *> KillSet;
+ for (Instruction &I : *BB)
+ if (isHandledGCPointerType(I.getType()))
+ KillSet.insert(&I);
+ return KillSet;
+}
+
+#ifndef NDEBUG
+/// Check that the items in 'Live' dominate 'TI'. This is used as a basic
+/// sanity check for the liveness computation.
+static void checkBasicSSA(DominatorTree &DT, DenseSet<Value *> &Live,
+ TerminatorInst *TI, bool TermOkay = false) {
+ for (Value *V : Live) {
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ // The terminator can be a member of the LiveOut set. LLVM's definition
+ // of instruction dominance states that V does not dominate itself. As
+ // such, we need to special case this to allow it.
+ if (TermOkay && TI == I)
+ continue;
+ assert(DT.dominates(I, TI) &&
+ "basic SSA liveness expectation violated by liveness analysis");
+ }
+ }
+}
+
+/// Check that all the liveness sets used during the computation of liveness
+/// obey basic SSA properties. This is useful for finding cases where we miss
+/// a def.
+static void checkBasicSSA(DominatorTree &DT, GCPtrLivenessData &Data,
+ BasicBlock &BB) {
+ checkBasicSSA(DT, Data.LiveSet[&BB], BB.getTerminator());
+ checkBasicSSA(DT, Data.LiveOut[&BB], BB.getTerminator(), true);
+ checkBasicSSA(DT, Data.LiveIn[&BB], BB.getTerminator());
+}
+#endif
+
+static void computeLiveInValues(DominatorTree &DT, Function &F,
+ GCPtrLivenessData &Data) {
+
+ SmallSetVector<BasicBlock *, 200> Worklist;
+ auto AddPredsToWorklist = [&](BasicBlock *BB) {
+ // We use a SetVector so that we don't have duplicates in the worklist.
+ Worklist.insert(pred_begin(BB), pred_end(BB));
+ };
+ auto NextItem = [&]() {
+ BasicBlock *BB = Worklist.back();
+ Worklist.pop_back();
+ return BB;
+ };
+
+ // Seed the liveness for each individual block
+ for (BasicBlock &BB : F) {
+ Data.KillSet[&BB] = computeKillSet(&BB);
+ Data.LiveSet[&BB].clear();
+ computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB]);
+
+#ifndef NDEBUG
+ for (Value *Kill : Data.KillSet[&BB])
+ assert(!Data.LiveSet[&BB].count(Kill) && "live set contains kill");
+#endif
+
+ Data.LiveOut[&BB] = DenseSet<Value *>();
+ computeLiveOutSeed(&BB, Data.LiveOut[&BB]);
+ Data.LiveIn[&BB] = Data.LiveSet[&BB];
+ set_union(Data.LiveIn[&BB], Data.LiveOut[&BB]);
+ set_subtract(Data.LiveIn[&BB], Data.KillSet[&BB]);
+ if (!Data.LiveIn[&BB].empty())
+ AddPredsToWorklist(&BB);
+ }
+
+ // Propagate that liveness until stable
+ while (!Worklist.empty()) {
+ BasicBlock *BB = NextItem();
+
+ // Compute our new liveout set, then exit early if it hasn't changed
+ // despite the contribution of our successor.
+ DenseSet<Value *> LiveOut = Data.LiveOut[BB];
+ const auto OldLiveOutSize = LiveOut.size();
+ for (BasicBlock *Succ : successors(BB)) {
+ assert(Data.LiveIn.count(Succ));
+ set_union(LiveOut, Data.LiveIn[Succ]);
+ }
+ // assert OutLiveOut is a subset of LiveOut
+ if (OldLiveOutSize == LiveOut.size()) {
+ // If the sets are the same size, then we didn't actually add anything
+ // when unioning our successors LiveIn Thus, the LiveIn of this block
+ // hasn't changed.
+ continue;
+ }
+ Data.LiveOut[BB] = LiveOut;
+
+ // Apply the effects of this basic block
+ DenseSet<Value *> LiveTmp = LiveOut;
+ set_union(LiveTmp, Data.LiveSet[BB]);
+ set_subtract(LiveTmp, Data.KillSet[BB]);
+
+ assert(Data.LiveIn.count(BB));
+ const DenseSet<Value *> &OldLiveIn = Data.LiveIn[BB];
+ // assert: OldLiveIn is a subset of LiveTmp
+ if (OldLiveIn.size() != LiveTmp.size()) {
+ Data.LiveIn[BB] = LiveTmp;
+ AddPredsToWorklist(BB);
+ }
+ } // while( !worklist.empty() )
+
+#ifndef NDEBUG
+ // Sanity check our ouput against SSA properties. This helps catch any
+ // missing kills during the above iteration.
+ for (BasicBlock &BB : F) {
+ checkBasicSSA(DT, Data, BB);
+ }
+#endif
+}
+
+static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
+ StatepointLiveSetTy &Out) {
+
+ BasicBlock *BB = Inst->getParent();
+
+ // Note: The copy is intentional and required
+ assert(Data.LiveOut.count(BB));
+ DenseSet<Value *> LiveOut = Data.LiveOut[BB];
+
+ // We want to handle the statepoint itself oddly. It's
+ // call result is not live (normal), nor are it's arguments
+ // (unless they're used again later). This adjustment is
+ // specifically what we need to relocate
+ BasicBlock::reverse_iterator rend(Inst);
+ computeLiveInValues(BB->rbegin(), rend, LiveOut);
+ LiveOut.erase(Inst);
+ Out.insert(LiveOut.begin(), LiveOut.end());
+}
+
+static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
+ const CallSite &CS,
+ PartiallyConstructedSafepointRecord &Info) {
+ Instruction *Inst = CS.getInstruction();
+ StatepointLiveSetTy Updated;
+ findLiveSetAtInst(Inst, RevisedLivenessData, Updated);
+
+#ifndef NDEBUG
+ DenseSet<Value *> Bases;
+ for (auto KVPair : Info.PointerToBase) {
+ Bases.insert(KVPair.second);
+ }
+#endif
+ // We may have base pointers which are now live that weren't before. We need
+ // to update the PointerToBase structure to reflect this.
+ for (auto V : Updated)
+ if (!Info.PointerToBase.count(V)) {
+ assert(Bases.count(V) && "can't find base for unexpected live value");
+ Info.PointerToBase[V] = V;
+ continue;
+ }
+
+#ifndef NDEBUG
+ for (auto V : Updated) {
+ assert(Info.PointerToBase.count(V) &&
+ "must be able to find base for live value");
+ }
+#endif
+
+ // Remove any stale base mappings - this can happen since our liveness is
+ // more precise then the one inherent in the base pointer analysis
+ DenseSet<Value *> ToErase;
+ for (auto KVPair : Info.PointerToBase)
+ if (!Updated.count(KVPair.first))
+ ToErase.insert(KVPair.first);
+ for (auto V : ToErase)
+ Info.PointerToBase.erase(V);
+
+#ifndef NDEBUG
+ for (auto KVPair : Info.PointerToBase)
+ assert(Updated.count(KVPair.first) && "record for non-live value");
+#endif
+
+ Info.liveset = Updated;
}
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 875a007..bc068f7 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -1012,7 +1012,8 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
Constant *Ptr = Operands[0];
auto Indices = makeArrayRef(Operands.begin() + 1, Operands.end());
- markConstant(&I, ConstantExpr::getGetElementPtr(Ptr, Indices));
+ markConstant(&I, ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr,
+ Indices));
}
void SCCPSolver::visitStoreInst(StoreInst &SI) {
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 06b000f..59dc528 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -1166,10 +1166,9 @@ public:
} else {
continue;
}
- Instruction *DbgVal =
- DIB.insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()),
- DIExpression(DVI->getExpression()), Inst);
- DbgVal->setDebugLoc(DVI->getDebugLoc());
+ DIB.insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()),
+ DIExpression(DVI->getExpression()),
+ DVI->getDebugLoc(), Inst);
}
}
};
@@ -1552,7 +1551,8 @@ static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
return BasePtr;
- return IRB.CreateInBoundsGEP(BasePtr, Indices, NamePrefix + "sroa_idx");
+ return IRB.CreateInBoundsGEP(nullptr, BasePtr, Indices,
+ NamePrefix + "sroa_idx");
}
/// \brief Get a natural GEP off of the BasePtr walking through Ty toward
@@ -1803,7 +1803,8 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
OffsetPtr = Int8PtrOffset == 0
? Int8Ptr
- : IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),
+ : IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Int8Ptr,
+ IRB.getInt(Int8PtrOffset),
NamePrefix + "sroa_raw_idx");
}
Ptr = OffsetPtr;
@@ -3250,7 +3251,8 @@ private:
void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
assert(Ty->isSingleValueType());
// Load the single value and insert it using the indices.
- Value *GEP = IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep");
+ Value *GEP =
+ IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep");
Value *Load = IRB.CreateLoad(GEP, Name + ".load");
Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
DEBUG(dbgs() << " to: " << *Load << "\n");
@@ -3283,7 +3285,7 @@ private:
// Extract the single value and store it using the indices.
Value *Store = IRB.CreateStore(
IRB.CreateExtractValue(Agg, Indices, Name + ".extract"),
- IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep"));
+ IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep"));
(void)Store;
DEBUG(dbgs() << " to: " << *Store << "\n");
}
@@ -4188,14 +4190,14 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
// Create a piece expression describing the new partition or reuse AI's
// expression if there is only one partition.
DIExpression PieceExpr = Expr;
- if (IsSplit || Expr.isBitPiece()) {
+ if (IsSplit || Expr->isBitPiece()) {
// If this alloca is already a scalar replacement of a larger aggregate,
// Piece.Offset describes the offset inside the scalar.
- uint64_t Offset = Expr.isBitPiece() ? Expr.getBitPieceOffset() : 0;
+ uint64_t Offset = Expr->isBitPiece() ? Expr->getBitPieceOffset() : 0;
uint64_t Start = Offset + Piece.Offset;
uint64_t Size = Piece.Size;
- if (Expr.isBitPiece()) {
- uint64_t AbsEnd = Expr.getBitPieceOffset() + Expr.getBitPieceSize();
+ if (Expr->isBitPiece()) {
+ uint64_t AbsEnd = Expr->getBitPieceOffset() + Expr->getBitPieceSize();
if (Start >= AbsEnd)
// No need to describe a SROAed padding.
continue;
@@ -4208,8 +4210,8 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
if (DbgDeclareInst *OldDDI = FindAllocaDbgDeclare(Piece.Alloca))
OldDDI->eraseFromParent();
- auto *NewDDI = DIB.insertDeclare(Piece.Alloca, Var, PieceExpr, &AI);
- NewDDI->setDebugLoc(DbgDecl->getDebugLoc());
+ DIB.insertDeclare(Piece.Alloca, Var, PieceExpr, DbgDecl->getDebugLoc(),
+ &AI);
}
}
return Changed;
diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp
index 3e7cf04..f99fe3f 100644
--- a/lib/Transforms/Scalar/SampleProfile.cpp
+++ b/lib/Transforms/Scalar/SampleProfile.cpp
@@ -217,16 +217,16 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS, BasicBlock *BB) {
/// \returns The profiled weight of I.
unsigned SampleProfileLoader::getInstWeight(Instruction &Inst) {
DebugLoc DLoc = Inst.getDebugLoc();
- if (DLoc.isUnknown())
+ if (!DLoc)
return 0;
unsigned Lineno = DLoc.getLine();
if (Lineno < HeaderLineno)
return 0;
- DILocation DIL(DLoc.getAsMDNode(*Ctx));
+ DILocation DIL = DLoc.get();
int LOffset = Lineno - HeaderLineno;
- unsigned Discriminator = DIL.getDiscriminator();
+ unsigned Discriminator = DIL->getDiscriminator();
unsigned Weight = Samples->samplesAt(LOffset, Discriminator);
DEBUG(dbgs() << " " << Lineno << "." << Discriminator << ":" << Inst
<< " (line offset: " << LOffset << "." << Discriminator
@@ -642,9 +642,8 @@ void SampleProfileLoader::propagateWeights(Function &F) {
/// \returns the line number where \p F is defined. If it returns 0,
/// it means that there is no debug information available for \p F.
unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
- DISubprogram S = getDISubprogram(&F);
- if (S.isSubprogram())
- return S.getLineNumber();
+ if (MDSubprogram *S = getDISubprogram(&F))
+ return S->getLine();
// If could not find the start of \p F, emit a diagnostic to inform the user
// about the missed opportunity.
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 6cc8411..42095ae 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -59,6 +59,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLowerExpectIntrinsicPass(Registry);
initializeMemCpyOptPass(Registry);
initializeMergedLoadStoreMotionPass(Registry);
+ initializeNaryReassociatePass(Registry);
initializePartiallyInlineLibCallsPass(Registry);
initializeReassociatePass(Registry);
initializeRegToMemPass(Registry);
@@ -77,6 +78,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLoadCombinePass(Registry);
initializePlaceBackedgeSafepointsImplPass(Registry);
initializePlaceSafepointsPass(Registry);
+ initializeFloat2IntPass(Registry);
}
void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index acd8585..693c5ae 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -1117,10 +1117,9 @@ public:
} else {
continue;
}
- Instruction *DbgVal = DIB->insertDbgValueIntrinsic(
- Arg, 0, DIVariable(DVI->getVariable()),
- DIExpression(DVI->getExpression()), Inst);
- DbgVal->setDebugLoc(DVI->getDebugLoc());
+ DIB->insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()),
+ DIExpression(DVI->getExpression()),
+ DVI->getDebugLoc(), Inst);
}
}
};
@@ -2135,7 +2134,7 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
// split the alloca again later.
unsigned AS = AI->getType()->getAddressSpace();
Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy(AS));
- V = Builder.CreateGEP(V, Builder.getInt64(NewOffset));
+ V = Builder.CreateGEP(Builder.getInt8Ty(), V, Builder.getInt64(NewOffset));
IdxTy = NewElts[Idx]->getAllocatedType();
uint64_t EltSize = DL.getTypeAllocSize(IdxTy) - NewOffset;
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
index a457cba..d55dc6a 100644
--- a/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -213,7 +213,7 @@ Value *Scatterer::operator[](unsigned I) {
CV[0] = Builder.CreateBitCast(V, Ty, V->getName() + ".i0");
}
if (I != 0)
- CV[I] = Builder.CreateConstGEP1_32(CV[0], I,
+ CV[I] = Builder.CreateConstGEP1_32(nullptr, CV[0], I,
V->getName() + ".i" + Twine(I));
} else {
// Search through a chain of InsertElementInsts looking for element I.
diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 1a04d74..8af4753 100644
--- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -757,14 +757,16 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
}
}
// Create an ugly GEP with a single index for each index.
- ResultPtr = Builder.CreateGEP(ResultPtr, Idx, "uglygep");
+ ResultPtr =
+ Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Idx, "uglygep");
}
}
// Create a GEP with the constant offset index.
if (AccumulativeByteOffset != 0) {
Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset);
- ResultPtr = Builder.CreateGEP(ResultPtr, Offset, "uglygep");
+ ResultPtr =
+ Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep");
}
if (ResultPtr->getType() != Variadic->getType())
ResultPtr = Builder.CreateBitCast(ResultPtr, Variadic->getType());
diff --git a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index e71031c..2fc9368 100644
--- a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -15,42 +15,46 @@
//
// There are many optimizations we can perform in the domain of SLSR. This file
// for now contains only an initial step. Specifically, we look for strength
-// reduction candidates in two forms:
+// reduction candidates in the following forms:
//
-// Form 1: (B + i) * S
-// Form 2: &B[i * S]
+// Form 1: B + i * S
+// Form 2: (B + i) * S
+// Form 3: &B[i * S]
//
// where S is an integer variable, and i is a constant integer. If we found two
-// candidates
+// candidates S1 and S2 in the same form and S1 dominates S2, we may rewrite S2
+// in a simpler way with respect to S1. For example,
//
-// S1: X = (B + i) * S
-// S2: Y = (B + i') * S
+// S1: X = B + i * S
+// S2: Y = B + i' * S => X + (i' - i) * S
//
-// or
+// S1: X = (B + i) * S
+// S2: Y = (B + i') * S => X + (i' - i) * S
//
// S1: X = &B[i * S]
-// S2: Y = &B[i' * S]
-//
-// and S1 dominates S2, we call S1 a basis of S2, and can replace S2 with
+// S2: Y = &B[i' * S] => &X[(i' - i) * S]
//
-// Y = X + (i' - i) * S
+// Note: (i' - i) * S is folded to the extent possible.
//
-// or
+// This rewriting is in general a good idea. The code patterns we focus on
+// usually come from loop unrolling, so (i' - i) * S is likely the same
+// across iterations and can be reused. When that happens, the optimized form
+// takes only one add starting from the second iteration.
//
-// Y = &X[(i' - i) * S]
-//
-// where (i' - i) * S is folded to the extent possible. When S2 has multiple
-// bases, we pick the one that is closest to S2, or S2's "immediate" basis.
+// When such rewriting is possible, we call S1 a "basis" of S2. When S2 has
+// multiple bases, we choose to rewrite S2 with respect to its "immediate"
+// basis, the basis that is the closest ancestor in the dominator tree.
//
// TODO:
//
-// - Handle candidates in the form of B + i * S
-//
// - Floating point arithmetics when fast math is enabled.
//
// - SLSR may decrease ILP at the architecture level. Targets that are very
// sensitive to ILP may want to disable it. Having SLSR to consider ILP is
// left as future work.
+//
+// - When (i' - i) is constant but i and i' are not, we could still perform
+// SLSR.
#include <vector>
#include "llvm/ADT/DenseSet.h"
@@ -72,13 +76,12 @@ namespace {
class StraightLineStrengthReduce : public FunctionPass {
public:
- // SLSR candidate. Such a candidate must be in the form of
- // (Base + Index) * Stride
- // or
- // Base[..][Index * Stride][..]
+ // SLSR candidate. Such a candidate must be in one of the forms described in
+ // the header comments.
struct Candidate : public ilist_node<Candidate> {
enum Kind {
Invalid, // reserved for the default constructor
+ Add, // B + i * S
Mul, // (B + i) * S
GEP, // &B[..][i * S][..]
};
@@ -92,14 +95,14 @@ public:
Basis(nullptr) {}
Kind CandidateKind;
const SCEV *Base;
- // Note that Index and Stride of a GEP candidate may not have the same
- // integer type. In that case, during rewriting, Stride will be
+ // Note that Index and Stride of a GEP candidate do not necessarily have the
+ // same integer type. In that case, during rewriting, Stride will be
// sign-extended or truncated to Index's type.
ConstantInt *Index;
Value *Stride;
// The instruction this candidate corresponds to. It helps us to rewrite a
// candidate with respect to its immediate basis. Note that one instruction
- // can corresponds to multiple candidates depending on how you associate the
+ // can correspond to multiple candidates depending on how you associate the
// expression. For instance,
//
// (a + 1) * (b + 2)
@@ -143,31 +146,43 @@ private:
// Returns true if Basis is a basis for C, i.e., Basis dominates C and they
// share the same base and stride.
bool isBasisFor(const Candidate &Basis, const Candidate &C);
+ // Returns whether the candidate can be folded into an addressing mode.
+ bool isFoldable(const Candidate &C, TargetTransformInfo *TTI,
+ const DataLayout *DL);
+ // Returns true if C is already in a simplest form and not worth being
+ // rewritten.
+ bool isSimplestForm(const Candidate &C);
// Checks whether I is in a candidate form. If so, adds all the matching forms
// to Candidates, and tries to find the immediate basis for each of them.
- void allocateCandidateAndFindBasis(Instruction *I);
+ void allocateCandidatesAndFindBasis(Instruction *I);
+ // Allocate candidates and find bases for Add instructions.
+ void allocateCandidatesAndFindBasisForAdd(Instruction *I);
+ // Given I = LHS + RHS, factors RHS into i * S and makes (LHS + i * S) a
+ // candidate.
+ void allocateCandidatesAndFindBasisForAdd(Value *LHS, Value *RHS,
+ Instruction *I);
// Allocate candidates and find bases for Mul instructions.
- void allocateCandidateAndFindBasisForMul(Instruction *I);
+ void allocateCandidatesAndFindBasisForMul(Instruction *I);
// Splits LHS into Base + Index and, if succeeds, calls
- // allocateCandidateAndFindBasis.
- void allocateCandidateAndFindBasisForMul(Value *LHS, Value *RHS,
- Instruction *I);
+ // allocateCandidatesAndFindBasis.
+ void allocateCandidatesAndFindBasisForMul(Value *LHS, Value *RHS,
+ Instruction *I);
// Allocate candidates and find bases for GetElementPtr instructions.
- void allocateCandidateAndFindBasisForGEP(GetElementPtrInst *GEP);
+ void allocateCandidatesAndFindBasisForGEP(GetElementPtrInst *GEP);
// A helper function that scales Idx with ElementSize before invoking
- // allocateCandidateAndFindBasis.
- void allocateCandidateAndFindBasisForGEP(const SCEV *B, ConstantInt *Idx,
- Value *S, uint64_t ElementSize,
- Instruction *I);
+ // allocateCandidatesAndFindBasis.
+ void allocateCandidatesAndFindBasisForGEP(const SCEV *B, ConstantInt *Idx,
+ Value *S, uint64_t ElementSize,
+ Instruction *I);
// Adds the given form <CT, B, Idx, S> to Candidates, and finds its immediate
// basis.
- void allocateCandidateAndFindBasis(Candidate::Kind CT, const SCEV *B,
- ConstantInt *Idx, Value *S,
- Instruction *I);
+ void allocateCandidatesAndFindBasis(Candidate::Kind CT, const SCEV *B,
+ ConstantInt *Idx, Value *S,
+ Instruction *I);
// Rewrites candidate C with respect to Basis.
void rewriteCandidateWithBasis(const Candidate &C, const Candidate &Basis);
// A helper function that factors ArrayIdx to a product of a stride and a
- // constant index, and invokes allocateCandidateAndFindBasis with the
+ // constant index, and invokes allocateCandidatesAndFindBasis with the
// factorings.
void factorArrayIndex(Value *ArrayIdx, const SCEV *Base, uint64_t ElementSize,
GetElementPtrInst *GEP);
@@ -187,7 +202,7 @@ private:
// Temporarily holds all instructions that are unlinked (but not deleted) by
// rewriteCandidateWithBasis. These instructions will be actually removed
// after all rewriting finishes.
- DenseSet<Instruction *> UnlinkedInstructions;
+ std::vector<Instruction *> UnlinkedInstructions;
};
} // anonymous namespace
@@ -215,9 +230,9 @@ bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis,
Basis.CandidateKind == C.CandidateKind);
}
-static bool isCompletelyFoldable(GetElementPtrInst *GEP,
- const TargetTransformInfo *TTI,
- const DataLayout *DL) {
+static bool isGEPFoldable(GetElementPtrInst *GEP,
+ const TargetTransformInfo *TTI,
+ const DataLayout *DL) {
GlobalVariable *BaseGV = nullptr;
int64_t BaseOffset = 0;
bool HasBaseReg = false;
@@ -252,53 +267,143 @@ static bool isCompletelyFoldable(GetElementPtrInst *GEP,
BaseOffset, HasBaseReg, Scale);
}
-// TODO: We currently implement an algorithm whose time complexity is linear to
-// the number of existing candidates. However, a better algorithm exists. We
-// could depth-first search the dominator tree, and maintain a hash table that
-// contains all candidates that dominate the node being traversed. This hash
-// table is indexed by the base and the stride of a candidate. Therefore,
-// finding the immediate basis of a candidate boils down to one hash-table look
-// up.
-void StraightLineStrengthReduce::allocateCandidateAndFindBasis(
- Candidate::Kind CT, const SCEV *B, ConstantInt *Idx, Value *S,
- Instruction *I) {
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
- // If &B[Idx * S] fits into an addressing mode, do not turn it into
- // non-free computation.
- if (isCompletelyFoldable(GEP, TTI, DL))
- return;
+// Returns whether (Base + Index * Stride) can be folded to an addressing mode.
+static bool isAddFoldable(const SCEV *Base, ConstantInt *Index, Value *Stride,
+ TargetTransformInfo *TTI) {
+ return TTI->isLegalAddressingMode(Base->getType(), nullptr, 0, true,
+ Index->getSExtValue());
+}
+
+bool StraightLineStrengthReduce::isFoldable(const Candidate &C,
+ TargetTransformInfo *TTI,
+ const DataLayout *DL) {
+ if (C.CandidateKind == Candidate::Add)
+ return isAddFoldable(C.Base, C.Index, C.Stride, TTI);
+ if (C.CandidateKind == Candidate::GEP)
+ return isGEPFoldable(cast<GetElementPtrInst>(C.Ins), TTI, DL);
+ return false;
+}
+
+// Returns true if GEP has zero or one non-zero index.
+static bool hasOnlyOneNonZeroIndex(GetElementPtrInst *GEP) {
+ unsigned NumNonZeroIndices = 0;
+ for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I) {
+ ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
+ if (ConstIdx == nullptr || !ConstIdx->isZero())
+ ++NumNonZeroIndices;
+ }
+ return NumNonZeroIndices <= 1;
+}
+
+bool StraightLineStrengthReduce::isSimplestForm(const Candidate &C) {
+ if (C.CandidateKind == Candidate::Add) {
+ // B + 1 * S or B + (-1) * S
+ return C.Index->isOne() || C.Index->isMinusOne();
+ }
+ if (C.CandidateKind == Candidate::Mul) {
+ // (B + 0) * S
+ return C.Index->isZero();
+ }
+ if (C.CandidateKind == Candidate::GEP) {
+ // (char*)B + S or (char*)B - S
+ return ((C.Index->isOne() || C.Index->isMinusOne()) &&
+ hasOnlyOneNonZeroIndex(cast<GetElementPtrInst>(C.Ins)));
}
+ return false;
+}
+// TODO: We currently implement an algorithm whose time complexity is linear in
+// the number of existing candidates. However, we could do better by using
+// ScopedHashTable. Specifically, while traversing the dominator tree, we could
+// maintain all the candidates that dominate the basic block being traversed in
+// a ScopedHashTable. This hash table is indexed by the base and the stride of
+// a candidate. Therefore, finding the immediate basis of a candidate boils down
+// to one hash-table look up.
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasis(
+ Candidate::Kind CT, const SCEV *B, ConstantInt *Idx, Value *S,
+ Instruction *I) {
Candidate C(CT, B, Idx, S, I);
- // Try to compute the immediate basis of C.
- unsigned NumIterations = 0;
- // Limit the scan radius to avoid running forever.
- static const unsigned MaxNumIterations = 50;
- for (auto Basis = Candidates.rbegin();
- Basis != Candidates.rend() && NumIterations < MaxNumIterations;
- ++Basis, ++NumIterations) {
- if (isBasisFor(*Basis, C)) {
- C.Basis = &(*Basis);
- break;
+ // SLSR can complicate an instruction in two cases:
+ //
+ // 1. If we can fold I into an addressing mode, computing I is likely free or
+ // takes only one instruction.
+ //
+ // 2. I is already in a simplest form. For example, when
+ // X = B + 8 * S
+ // Y = B + S,
+ // rewriting Y to X - 7 * S is probably a bad idea.
+ //
+ // In the above cases, we still add I to the candidate list so that I can be
+ // the basis of other candidates, but we leave I's basis blank so that I
+ // won't be rewritten.
+ if (!isFoldable(C, TTI, DL) && !isSimplestForm(C)) {
+ // Try to compute the immediate basis of C.
+ unsigned NumIterations = 0;
+ // Limit the scan radius to avoid running in quadratice time.
+ static const unsigned MaxNumIterations = 50;
+ for (auto Basis = Candidates.rbegin();
+ Basis != Candidates.rend() && NumIterations < MaxNumIterations;
+ ++Basis, ++NumIterations) {
+ if (isBasisFor(*Basis, C)) {
+ C.Basis = &(*Basis);
+ break;
+ }
}
}
// Regardless of whether we find a basis for C, we need to push C to the
- // candidate list.
+ // candidate list so that it can be the basis of other candidates.
Candidates.push_back(C);
}
-void StraightLineStrengthReduce::allocateCandidateAndFindBasis(Instruction *I) {
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasis(
+ Instruction *I) {
switch (I->getOpcode()) {
+ case Instruction::Add:
+ allocateCandidatesAndFindBasisForAdd(I);
+ break;
case Instruction::Mul:
- allocateCandidateAndFindBasisForMul(I);
+ allocateCandidatesAndFindBasisForMul(I);
break;
case Instruction::GetElementPtr:
- allocateCandidateAndFindBasisForGEP(cast<GetElementPtrInst>(I));
+ allocateCandidatesAndFindBasisForGEP(cast<GetElementPtrInst>(I));
break;
}
}
-void StraightLineStrengthReduce::allocateCandidateAndFindBasisForMul(
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd(
+ Instruction *I) {
+ // Try matching B + i * S.
+ if (!isa<IntegerType>(I->getType()))
+ return;
+
+ assert(I->getNumOperands() == 2 && "isn't I an add?");
+ Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
+ allocateCandidatesAndFindBasisForAdd(LHS, RHS, I);
+ if (LHS != RHS)
+ allocateCandidatesAndFindBasisForAdd(RHS, LHS, I);
+}
+
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd(
+ Value *LHS, Value *RHS, Instruction *I) {
+ Value *S = nullptr;
+ ConstantInt *Idx = nullptr;
+ if (match(RHS, m_Mul(m_Value(S), m_ConstantInt(Idx)))) {
+ // I = LHS + RHS = LHS + Idx * S
+ allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I);
+ } else if (match(RHS, m_Shl(m_Value(S), m_ConstantInt(Idx)))) {
+ // I = LHS + RHS = LHS + (S << Idx) = LHS + S * (1 << Idx)
+ APInt One(Idx->getBitWidth(), 1);
+ Idx = ConstantInt::get(Idx->getContext(), One << Idx->getValue());
+ allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I);
+ } else {
+ // At least, I = LHS + 1 * RHS
+ ConstantInt *One = ConstantInt::get(cast<IntegerType>(I->getType()), 1);
+ allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), One, RHS,
+ I);
+ }
+}
+
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul(
Value *LHS, Value *RHS, Instruction *I) {
Value *B = nullptr;
ConstantInt *Idx = nullptr;
@@ -306,54 +411,54 @@ void StraightLineStrengthReduce::allocateCandidateAndFindBasisForMul(
if (match(LHS, m_Add(m_Value(B), m_ConstantInt(Idx)))) {
// If LHS is in the form of "Base + Index", then I is in the form of
// "(Base + Index) * RHS".
- allocateCandidateAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I);
+ allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I);
} else {
// Otherwise, at least try the form (LHS + 0) * RHS.
ConstantInt *Zero = ConstantInt::get(cast<IntegerType>(I->getType()), 0);
- allocateCandidateAndFindBasis(Candidate::Mul, SE->getSCEV(LHS), Zero, RHS,
+ allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(LHS), Zero, RHS,
I);
}
}
-void StraightLineStrengthReduce::allocateCandidateAndFindBasisForMul(
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul(
Instruction *I) {
// Try matching (B + i) * S.
// TODO: we could extend SLSR to float and vector types.
if (!isa<IntegerType>(I->getType()))
return;
+ assert(I->getNumOperands() == 2 && "isn't I a mul?");
Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
- allocateCandidateAndFindBasisForMul(LHS, RHS, I);
+ allocateCandidatesAndFindBasisForMul(LHS, RHS, I);
if (LHS != RHS) {
// Symmetrically, try to split RHS to Base + Index.
- allocateCandidateAndFindBasisForMul(RHS, LHS, I);
+ allocateCandidatesAndFindBasisForMul(RHS, LHS, I);
}
}
-void StraightLineStrengthReduce::allocateCandidateAndFindBasisForGEP(
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
const SCEV *B, ConstantInt *Idx, Value *S, uint64_t ElementSize,
Instruction *I) {
- // I = B + sext(Idx *nsw S) *nsw ElementSize
+ // I = B + sext(Idx *nsw S) * ElementSize
+ // = B + (sext(Idx) * sext(S)) * ElementSize
// = B + (sext(Idx) * ElementSize) * sext(S)
// Casting to IntegerType is safe because we skipped vector GEPs.
IntegerType *IntPtrTy = cast<IntegerType>(DL->getIntPtrType(I->getType()));
ConstantInt *ScaledIdx = ConstantInt::get(
IntPtrTy, Idx->getSExtValue() * (int64_t)ElementSize, true);
- allocateCandidateAndFindBasis(Candidate::GEP, B, ScaledIdx, S, I);
+ allocateCandidatesAndFindBasis(Candidate::GEP, B, ScaledIdx, S, I);
}
void StraightLineStrengthReduce::factorArrayIndex(Value *ArrayIdx,
const SCEV *Base,
uint64_t ElementSize,
GetElementPtrInst *GEP) {
- // At least, ArrayIdx = ArrayIdx *s 1.
- allocateCandidateAndFindBasisForGEP(
+ // At least, ArrayIdx = ArrayIdx *nsw 1.
+ allocateCandidatesAndFindBasisForGEP(
Base, ConstantInt::get(cast<IntegerType>(ArrayIdx->getType()), 1),
ArrayIdx, ElementSize, GEP);
Value *LHS = nullptr;
ConstantInt *RHS = nullptr;
- // TODO: handle shl. e.g., we could treat (S << 2) as (S * 4).
- //
// One alternative is matching the SCEV of ArrayIdx instead of ArrayIdx
// itself. This would allow us to handle the shl case for free. However,
// matching SCEVs has two issues:
@@ -367,12 +472,19 @@ void StraightLineStrengthReduce::factorArrayIndex(Value *ArrayIdx,
// sext'ed multiplication.
if (match(ArrayIdx, m_NSWMul(m_Value(LHS), m_ConstantInt(RHS)))) {
// SLSR is currently unsafe if i * S may overflow.
- // GEP = Base + sext(LHS *nsw RHS) *nsw ElementSize
- allocateCandidateAndFindBasisForGEP(Base, RHS, LHS, ElementSize, GEP);
+ // GEP = Base + sext(LHS *nsw RHS) * ElementSize
+ allocateCandidatesAndFindBasisForGEP(Base, RHS, LHS, ElementSize, GEP);
+ } else if (match(ArrayIdx, m_NSWShl(m_Value(LHS), m_ConstantInt(RHS)))) {
+ // GEP = Base + sext(LHS <<nsw RHS) * ElementSize
+ // = Base + sext(LHS *nsw (1 << RHS)) * ElementSize
+ APInt One(RHS->getBitWidth(), 1);
+ ConstantInt *PowerOf2 =
+ ConstantInt::get(RHS->getContext(), One << RHS->getValue());
+ allocateCandidatesAndFindBasisForGEP(Base, PowerOf2, LHS, ElementSize, GEP);
}
}
-void StraightLineStrengthReduce::allocateCandidateAndFindBasisForGEP(
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
GetElementPtrInst *GEP) {
// TODO: handle vector GEPs
if (GEP->getType()->isVectorTy())
@@ -436,6 +548,7 @@ Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,
else
BumpWithUglyGEP = true;
}
+
// Compute Bump = C - Basis = (i' - i) * S.
// Common case 1: if (i' - i) is 1, Bump = S.
if (IndexOffset.getSExtValue() == 1)
@@ -443,9 +556,24 @@ Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,
// Common case 2: if (i' - i) is -1, Bump = -S.
if (IndexOffset.getSExtValue() == -1)
return Builder.CreateNeg(C.Stride);
- // Otherwise, Bump = (i' - i) * sext/trunc(S).
- ConstantInt *Delta = ConstantInt::get(Basis.Ins->getContext(), IndexOffset);
- Value *ExtendedStride = Builder.CreateSExtOrTrunc(C.Stride, Delta->getType());
+
+ // Otherwise, Bump = (i' - i) * sext/trunc(S). Note that (i' - i) and S may
+ // have different bit widths.
+ IntegerType *DeltaType =
+ IntegerType::get(Basis.Ins->getContext(), IndexOffset.getBitWidth());
+ Value *ExtendedStride = Builder.CreateSExtOrTrunc(C.Stride, DeltaType);
+ if (IndexOffset.isPowerOf2()) {
+ // If (i' - i) is a power of 2, Bump = sext/trunc(S) << log(i' - i).
+ ConstantInt *Exponent = ConstantInt::get(DeltaType, IndexOffset.logBase2());
+ return Builder.CreateShl(ExtendedStride, Exponent);
+ }
+ if ((-IndexOffset).isPowerOf2()) {
+ // If (i - i') is a power of 2, Bump = -sext/trunc(S) << log(i' - i).
+ ConstantInt *Exponent =
+ ConstantInt::get(DeltaType, (-IndexOffset).logBase2());
+ return Builder.CreateNeg(Builder.CreateShl(ExtendedStride, Exponent));
+ }
+ Constant *Delta = ConstantInt::get(DeltaType, IndexOffset);
return Builder.CreateMul(ExtendedStride, Delta);
}
@@ -453,6 +581,9 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
const Candidate &C, const Candidate &Basis) {
assert(C.CandidateKind == Basis.CandidateKind && C.Base == Basis.Base &&
C.Stride == Basis.Stride);
+ // We run rewriteCandidateWithBasis on all candidates in a post-order, so the
+ // basis of a candidate cannot be unlinked before the candidate.
+ assert(Basis.Ins->getParent() != nullptr && "the basis is unlinked");
// An instruction can correspond to multiple candidates. Therefore, instead of
// simply deleting an instruction when we rewrite it, we mark its parent as
@@ -466,25 +597,38 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
Value *Bump = emitBump(Basis, C, Builder, DL, BumpWithUglyGEP);
Value *Reduced = nullptr; // equivalent to but weaker than C.Ins
switch (C.CandidateKind) {
+ case Candidate::Add:
case Candidate::Mul:
- Reduced = Builder.CreateAdd(Basis.Ins, Bump);
+ if (BinaryOperator::isNeg(Bump)) {
+ Reduced =
+ Builder.CreateSub(Basis.Ins, BinaryOperator::getNegArgument(Bump));
+ } else {
+ Reduced = Builder.CreateAdd(Basis.Ins, Bump);
+ }
break;
case Candidate::GEP:
{
Type *IntPtrTy = DL->getIntPtrType(C.Ins->getType());
+ bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds();
if (BumpWithUglyGEP) {
// C = (char *)Basis + Bump
unsigned AS = Basis.Ins->getType()->getPointerAddressSpace();
Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS);
Reduced = Builder.CreateBitCast(Basis.Ins, CharTy);
- // We only considered inbounds GEP as candidates.
- Reduced = Builder.CreateInBoundsGEP(Reduced, Bump);
+ if (InBounds)
+ Reduced =
+ Builder.CreateInBoundsGEP(Builder.getInt8Ty(), Reduced, Bump);
+ else
+ Reduced = Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump);
Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType());
} else {
// C = gep Basis, Bump
// Canonicalize bump to pointer size.
Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy);
- Reduced = Builder.CreateInBoundsGEP(Basis.Ins, Bump);
+ if (InBounds)
+ Reduced = Builder.CreateInBoundsGEP(nullptr, Basis.Ins, Bump);
+ else
+ Reduced = Builder.CreateGEP(nullptr, Basis.Ins, Bump);
}
}
break;
@@ -497,7 +641,7 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
// Unlink C.Ins so that we can skip other candidates also corresponding to
// C.Ins. The actual deletion is postponed to the end of runOnFunction.
C.Ins->removeFromParent();
- UnlinkedInstructions.insert(C.Ins);
+ UnlinkedInstructions.push_back(C.Ins);
}
bool StraightLineStrengthReduce::runOnFunction(Function &F) {
@@ -512,7 +656,7 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) {
for (auto node = GraphTraits<DominatorTree *>::nodes_begin(DT);
node != GraphTraits<DominatorTree *>::nodes_end(DT); ++node) {
for (auto &I : *node->getBlock())
- allocateCandidateAndFindBasis(&I);
+ allocateCandidatesAndFindBasis(&I);
}
// Rewrite candidates in the reverse depth-first order. This order makes sure
diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index 6c3ce58..4f23e20 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -887,7 +887,7 @@ void StructurizeCFG::createFlow() {
/// no longer dominate all their uses. Not sure if this is really nessasary
void StructurizeCFG::rebuildSSA() {
SSAUpdater Updater;
- for (const auto &BB : ParentRegion->blocks())
+ for (auto *BB : ParentRegion->blocks())
for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
II != IE; ++II) {