summaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Scalar
diff options
context:
space:
mode:
authorDan Gohman <gohman@apple.com>2008-09-15 21:22:06 +0000
committerDan Gohman <gohman@apple.com>2008-09-15 21:22:06 +0000
commitad7321f58a485ae80fe3da1f400aa695e250b1a8 (patch)
tree57eabe141c7fd8e2d63d5b09e6edd6f75b0b32f1 /lib/Transforms/Scalar
parent89e9ed379569528f75a29f2367fccc06a39fe201 (diff)
downloadexternal_llvm-ad7321f58a485ae80fe3da1f400aa695e250b1a8.zip
external_llvm-ad7321f58a485ae80fe3da1f400aa695e250b1a8.tar.gz
external_llvm-ad7321f58a485ae80fe3da1f400aa695e250b1a8.tar.bz2
Teach LSR to optimize away SMAX operations for tripcounts in common
cases. See the comment above OptimizeSMax for the full story, and the testcase for an example. This cancels out a pessimization commonly attributed to indvars, and will allow us to lift some of the artificial throttles in indvars, rather than add new ones. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@56230 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp127
1 files changed, 127 insertions, 0 deletions
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 145aa92..07438e3 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -184,6 +184,11 @@ private:
/// inside the loop then try to eliminate the cast opeation.
void OptimizeShadowIV(Loop *L);
+ /// OptimizeSMax - Rewrite the loop's terminating condition
+ /// if it uses an smax computation.
+ ICmpInst *OptimizeSMax(Loop *L, ICmpInst *Cond,
+ IVStrideUse* &CondUse);
+
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
const SCEVHandle *&CondStride);
bool RequiresTypeConversion(const Type *Ty, const Type *NewTy);
@@ -1695,6 +1700,123 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
return Cond;
}
+/// OptimizeSMax - Rewrite the loop's terminating condition if it uses
+/// an smax computation.
+///
+/// This is a narrow solution to a specific, but acute, problem. For loops
+/// like this:
+///
+/// i = 0;
+/// do {
+/// p[i] = 0.0;
+/// } while (++i < n);
+///
+/// where the comparison is signed, the trip count isn't just 'n', because
+/// 'n' could be negative. And unfortunately this can come up even for loops
+/// where the user didn't use a C do-while loop. For example, seemingly
+/// well-behaved top-test loops will commonly be lowered like this:
+//
+/// if (n > 0) {
+/// i = 0;
+/// do {
+/// p[i] = 0.0;
+/// } while (++i < n);
+/// }
+///
+/// and then it's possible for subsequent optimization to obscure the if
+/// test in such a way that indvars can't find it.
+///
+/// When indvars can't find the if test in loops like this, it creates a
+/// signed-max expression, which allows it to give the loop a canonical
+/// induction variable:
+///
+/// i = 0;
+/// smax = n < 1 ? 1 : n;
+/// do {
+/// p[i] = 0.0;
+/// } while (++i != smax);
+///
+/// Canonical induction variables are necessary because the loop passes
+/// are designed around them. The most obvious example of this is the
+/// LoopInfo analysis, which doesn't remember trip count values. It
+/// expects to be able to rediscover the trip count each time it is
+/// needed, and it does this using a simple analyis that only succeeds if
+/// the loop has a canonical induction variable.
+///
+/// However, when it comes time to generate code, the maximum operation
+/// can be quite costly, especially if it's inside of an outer loop.
+///
+/// This function solves this problem by detecting this type of loop and
+/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
+/// the instructions for the maximum computation.
+///
+ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond,
+ IVStrideUse* &CondUse) {
+ // Check that the loop matches the pattern we're looking for.
+ if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
+ Cond->getPredicate() != CmpInst::ICMP_NE)
+ return Cond;
+
+ SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
+ if (!Sel || !Sel->hasOneUse()) return Cond;
+
+ SCEVHandle IterationCount = SE->getIterationCount(L);
+ if (isa<SCEVCouldNotCompute>(IterationCount))
+ return Cond;
+ SCEVHandle One = SE->getIntegerSCEV(1, IterationCount->getType());
+
+ // Adjust for an annoying getIterationCount quirk.
+ IterationCount = SE->getAddExpr(IterationCount, One);
+
+ // Check for a max calculation that matches the pattern.
+ SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(IterationCount);
+ if (!SMax || SMax != SE->getSCEV(Sel)) return Cond;
+
+ SCEVHandle SMaxLHS = SMax->getOperand(0);
+ SCEVHandle SMaxRHS = SMax->getOperand(1);
+ if (!SMaxLHS || SMaxLHS != One) return Cond;
+
+ // Check the relevant induction variable for conformance to
+ // the pattern.
+ SCEVHandle IV = SE->getSCEV(Cond->getOperand(0));
+ SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
+ if (!AR || !AR->isAffine() ||
+ AR->getStart() != One ||
+ AR->getStepRecurrence(*SE) != One)
+ return Cond;
+
+ // Check the right operand of the select, and remember it, as it will
+ // be used in the new comparison instruction.
+ Value *NewRHS = 0;
+ if (SE->getSCEV(Sel->getOperand(1)) == SMaxRHS)
+ NewRHS = Sel->getOperand(1);
+ else if (SE->getSCEV(Sel->getOperand(2)) == SMaxRHS)
+ NewRHS = Sel->getOperand(2);
+ if (!NewRHS) return Cond;
+
+ // Ok, everything looks ok to change the condition into an SLT or SGE and
+ // delete the max calculation.
+ ICmpInst *NewCond =
+ new ICmpInst(Cond->getPredicate() == CmpInst::ICMP_NE ?
+ CmpInst::ICMP_SLT :
+ CmpInst::ICMP_SGE,
+ Cond->getOperand(0), NewRHS, "scmp", Cond);
+
+ // Delete the max calculation instructions.
+ Cond->replaceAllUsesWith(NewCond);
+ Cond->eraseFromParent();
+ SE->deleteValueFromRecords(Cond);
+ Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
+ Sel->eraseFromParent();
+ SE->deleteValueFromRecords(Sel);
+ if (Cmp->use_empty()) {
+ Cmp->eraseFromParent();
+ SE->deleteValueFromRecords(Cmp);
+ }
+ CondUse->User = NewCond;
+ return NewCond;
+}
+
/// OptimizeShadowIV - If IV is used in a int-to-float cast
/// inside the loop then try to eliminate the cast opeation.
void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
@@ -1836,6 +1958,11 @@ void LoopStrengthReduce::OptimizeIndvars(Loop *L) {
if (!FindIVUserForCond(Cond, CondUse, CondStride))
return; // setcc doesn't use the IV.
+ // If the trip count is computed in terms of an smax (due to ScalarEvolution
+ // being unable to find a sufficient guard, for example), change the loop
+ // comparison to use SLT instead of NE.
+ Cond = OptimizeSMax(L, Cond, CondUse);
+
// If possible, change stride and operands of the compare instruction to
// eliminate one stride.
Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);