diff options
author | Andrew Trick <atrick@apple.com> | 2012-07-18 04:35:10 +0000 |
---|---|---|
committer | Andrew Trick <atrick@apple.com> | 2012-07-18 04:35:10 +0000 |
commit | 4781d8ee1cd586bf7a569f80e1e49694c93eddd8 (patch) | |
tree | ddd01788c164c6fd412f657aacebdcfb26478e31 | |
parent | 75dc33a60b65bbbf2253b0b916df1d36a4da4237 (diff) | |
download | external_llvm-4781d8ee1cd586bf7a569f80e1e49694c93eddd8.zip external_llvm-4781d8ee1cd586bf7a569f80e1e49694c93eddd8.tar.gz external_llvm-4781d8ee1cd586bf7a569f80e1e49694c93eddd8.tar.bz2 |
indvars: Linear function test replace should avoid reusing undef.
Fixes PR13371: indvars pass incorrectly substitutes 'undef' values.
I do not like this fix. It's needed until/unless the meaning of undef
changes. It attempts to be complete according to the IR spec, but I
don't have much confidence in the implementation given the difficulty
testing undefined behavior. Worse, this invalidates some of my
hard-fought work on indvars and LSR to optimize pointer induction
variables. It results benchmark regressions, which I'll track
internally. On x86_64 no LTO I see:
-3% huffbench
-3% 400.perlbench
-8% fhourstones
My only suggestion for recovering is to change the meaning of
undef. If we could trust an arbitrary instruction to produce a some
real value that can be manipulated (e.g. incremented) according to
non-undef rules, then this case could be easily handled with SCEV.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@160421 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Transforms/Scalar/IndVarSimplify.cpp | 72 | ||||
-rw-r--r-- | test/Transforms/IndVarSimplify/2012-07-17-lftr-undef.ll | 22 | ||||
-rw-r--r-- | test/Transforms/IndVarSimplify/lftr-reuse.ll | 11 |
3 files changed, 96 insertions, 9 deletions
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index a9ba657..4b5c84c 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1215,21 +1215,26 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) { return 0; } -/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show -/// that the current exit test is already sufficiently canonical. -static bool needsLFTR(Loop *L, DominatorTree *DT) { +/// Return the compare guarding the loop latch, or NULL for unrecognized tests. +static ICmpInst *getLoopTest(Loop *L) { assert(L->getExitingBlock() && "expected loop exit"); BasicBlock *LatchBlock = L->getLoopLatch(); // Don't bother with LFTR if the loop is not properly simplified. if (!LatchBlock) - return false; + return 0; BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); assert(BI && "expected exit branch"); + return dyn_cast<ICmpInst>(BI->getCondition()); +} + +/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show +/// that the current exit test is already sufficiently canonical. +static bool needsLFTR(Loop *L, DominatorTree *DT) { // Do LFTR to simplify the exit condition to an ICMP. - ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); + ICmpInst *Cond = getLoopTest(L); if (!Cond) return true; @@ -1259,6 +1264,48 @@ static bool needsLFTR(Loop *L, DominatorTree *DT) { return Phi != getLoopPhiForCounter(IncV, L, DT); } +/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils +/// down to checking that all operands are constant and listing instructions +/// that may hide undef. +static bool hasConcreteDefImpl(Value *V, SmallPtrSet<Value*, 8> &Visited, + unsigned Depth) { + if (isa<Constant>(V)) + return !isa<UndefValue>(V); + + if (Depth >= 6) + return false; + + // Conservatively handle non-constant non-instructions. For example, Arguments + // may be undef. + Instruction *I = dyn_cast<Instruction>(V); + if (!I) + return false; + + // Load and return values may be undef. + if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) + return false; + + // Optimistically handle other instructions. + for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) { + if (!Visited.insert(*OI)) + continue; + if (!hasConcreteDefImpl(*OI, Visited, Depth+1)) + return false; + } + return true; +} + +/// Return true if the given value is concrete. We must prove that undef can +/// never reach it. +/// +/// TODO: If we decide that this is a good approach to checking for undef, we +/// may factor it into a common location. +static bool hasConcreteDef(Value *V) { + SmallPtrSet<Value*, 8> Visited; + Visited.insert(V); + return hasConcreteDefImpl(V, Visited, 0); +} + /// AlmostDeadIV - Return true if this IV has any uses other than the (soon to /// be rewritten) loop exit test. static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { @@ -1283,6 +1330,8 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { /// valid count without scaling the address stride, so it remains a pointer /// expression as far as SCEV is concerned. /// +/// Currently only valid for LFTR. See the comments on hasConcreteDef below. +/// /// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount /// /// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride. @@ -1331,6 +1380,19 @@ FindLoopCounter(Loop *L, const SCEV *BECount, if (getLoopPhiForCounter(IncV, L, DT) != Phi) continue; + // Avoid reusing a potentially undef value to compute other values that may + // have originally had a concrete definition. + if (!hasConcreteDef(Phi)) { + // We explicitly allow unknown phis as long as they are already used by + // the loop test. In this case we assume that performing LFTR could not + // increase the number of undef users. + if (ICmpInst *Cond = getLoopTest(L)) { + if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L, DT) + && Phi != getLoopPhiForCounter(Cond->getOperand(1), L, DT)) { + continue; + } + } + } const SCEV *Init = AR->getStart(); if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { diff --git a/test/Transforms/IndVarSimplify/2012-07-17-lftr-undef.ll b/test/Transforms/IndVarSimplify/2012-07-17-lftr-undef.ll new file mode 100644 index 0000000..7c5f818 --- /dev/null +++ b/test/Transforms/IndVarSimplify/2012-07-17-lftr-undef.ll @@ -0,0 +1,22 @@ +; RUN: opt < %s -indvars -S | FileCheck %s +; PR13371: indvars pass incorrectly substitutes 'undef' values +; +; LFTR should not user %undef as the loop counter. +; CHECK: @test +; CHECK-NOT: icmp{{.*}}undef +@.str3 = private constant [6 x i8] c"%lld\0A\00", align 1 +declare i32 @printf(i8* noalias nocapture, ...) nounwind +define i64 @test() nounwind { +func_start: + br label %block9 +block9: ; preds = %block9,%func_start + %undef = phi i64 [ %next_undef, %block9 ], [ undef, %func_start ] + %iter = phi i64 [ %next_iter, %block9 ], [ 1, %func_start ] + %next_iter = add nsw i64 %iter, 1 + %0 = tail call i32 (i8*, ...)* @printf(i8* noalias nocapture getelementptr inbounds ([6 x i8]* @.str3, i64 0, i64 0), i64 %next_iter, i64 %undef) + %next_undef = add nsw i64 %undef, 1 + %_tmp_3 = icmp slt i64 %next_iter, 100 + br i1 %_tmp_3, label %block9, label %exit +exit: ; preds = %block9 + ret i64 0 +} diff --git a/test/Transforms/IndVarSimplify/lftr-reuse.ll b/test/Transforms/IndVarSimplify/lftr-reuse.ll index 9abfe13..7fb36e5 100644 --- a/test/Transforms/IndVarSimplify/lftr-reuse.ll +++ b/test/Transforms/IndVarSimplify/lftr-reuse.ll @@ -153,6 +153,9 @@ return: ; Remove %i which is only used by the exit test. ; Verify that SCEV can still compute a backedge count from the sign ; extended %n, used for pointer comparison by LFTR. +; +; TODO: Fix for PR13371 currently makes this impossible. See +; IndVarSimplify.cpp hasConcreteDef(). We may want to change to undef rules. define void @geplftr(i8* %base, i32 %x, i32 %y, i32 %n) nounwind { entry: %x.ext = sext i32 %x to i64 @@ -162,13 +165,13 @@ entry: %lim = add i32 %x, %n %cmp.ph = icmp ult i32 %x, %lim br i1 %cmp.ph, label %loop, label %exit - +; CHECK: @geplftr ; CHECK: loop: ; CHECK: phi i8* -; CHECK-NOT: phi +; DISABLE-NOT: phi // This check is currently disabled ; CHECK: getelementptr ; CHECK: store -; CHECK: icmp ne i8* +; DISABLE: icmp ne i8* // This check is currently disabled ; CHECK: br i1 loop: %i = phi i32 [ %x, %entry ], [ %inc, %loop ] @@ -187,7 +190,7 @@ exit: define void @nevertaken() nounwind uwtable ssp { entry: br label %loop - +; CHECK: @nevertaken ; CHECK: loop: ; CHECK-NOT: phi ; CHECK-NOT: add |