summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Gohman <gohman@apple.com>2010-05-07 23:36:59 +0000
committerDan Gohman <gohman@apple.com>2010-05-07 23:36:59 +0000
commit59dc60337fb9c02c4fbec3b44d7275a32bafa775 (patch)
tree4e6a5481eef75b59a2d12c67a5f52fd6c25bc158
parentc665a5146d37cbc793f2e4413fd39a40ec230e12 (diff)
downloadexternal_llvm-59dc60337fb9c02c4fbec3b44d7275a32bafa775.zip
external_llvm-59dc60337fb9c02c4fbec3b44d7275a32bafa775.tar.gz
external_llvm-59dc60337fb9c02c4fbec3b44d7275a32bafa775.tar.bz2
When pruning candidate formulae out of an LSRUse, update the
LSRUse's Regs set after all pruning is done, rather than trying to do it on the fly, which can produce an incomplete result. This fixes a case where heuristic pruning was stripping all formulae from a use, which led the solver to enter an infinite loop. Also, add a few asserts to diagnose this kind of situation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@103328 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp14
-rw-r--r--test/CodeGen/X86/lsr-delayed-fold.ll83
2 files changed, 94 insertions, 3 deletions
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index cf3d16f..61966f5 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -2592,9 +2592,6 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
LSRUse &LU = Uses[LUIdx];
FormulaSorter Sorter(L, LU, SE, DT);
- // Clear out the set of used regs; it will be recomputed.
- LU.Regs.clear();
-
for (size_t FIdx = 0, NumForms = LU.Formulae.size();
FIdx != NumForms; ++FIdx) {
Formula &F = LU.Formulae[FIdx];
@@ -2632,9 +2629,18 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
--NumForms;
continue;
}
+ }
+
+ // Now that we've filtered out some formulae, recompute the Regs set.
+ LU.Regs.clear();
+ for (size_t FIdx = 0, NumForms = LU.Formulae.size();
+ FIdx != NumForms; ++FIdx) {
+ Formula &F = LU.Formulae[FIdx];
if (F.ScaledReg) LU.Regs.insert(F.ScaledReg);
LU.Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
}
+
+ // Reset this to prepare for the next use.
BestFormulae.clear();
}
@@ -2718,6 +2724,7 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
LU.Formulae.pop_back();
--e;
--i;
+ assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?");
continue;
}
@@ -2810,6 +2817,7 @@ retry:
// If none of the formulae had all of the required registers, relax the
// constraint so that we don't exclude all formulae.
if (!AnySatisfiedReqRegs) {
+ assert(!ReqRegs.empty() && "Solver failed even without required registers");
ReqRegs.clear();
goto retry;
}
diff --git a/test/CodeGen/X86/lsr-delayed-fold.ll b/test/CodeGen/X86/lsr-delayed-fold.ll
index 17d6a4c..8afbb0d 100644
--- a/test/CodeGen/X86/lsr-delayed-fold.ll
+++ b/test/CodeGen/X86/lsr-delayed-fold.ll
@@ -49,3 +49,86 @@ lbl_264: ; preds = %if.end, %lbl_264.pr
%tobool12 = icmp eq i8 %mul.i18, 0 ; <i1> [#uses=1]
unreachable
}
+
+; LSR ends up going into conservative pruning mode; don't prune the solution
+; so far that it becomes unsolvable though.
+; PR7077
+
+%struct.Bu = type { i32, i32, i32 }
+
+define void @_Z3fooP2Bui(%struct.Bu* nocapture %bu) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.inc131, %entry
+ %indvar = phi i64 [ %indvar.next, %for.inc131 ], [ 0, %entry ] ; <i64> [#uses=3]
+ br i1 undef, label %for.inc131, label %lor.lhs.false
+
+lor.lhs.false: ; preds = %for.body
+ %tmp15 = add i64 %indvar, 1 ; <i64> [#uses=1]
+ %tmp17 = add i64 %indvar, 2 ; <i64> [#uses=1]
+ %tmp19 = add i64 %indvar, 3 ; <i64> [#uses=1]
+ %tmp21 = add i64 %indvar, 4 ; <i64> [#uses=1]
+ %tmp23 = add i64 %indvar, 5 ; <i64> [#uses=1]
+ %tmp25 = add i64 %indvar, 6 ; <i64> [#uses=1]
+ %tmp27 = add i64 %indvar, 7 ; <i64> [#uses=1]
+ %tmp29 = add i64 %indvar, 8 ; <i64> [#uses=1]
+ %tmp31 = add i64 %indvar, 9 ; <i64> [#uses=1]
+ %tmp35 = add i64 %indvar, 11 ; <i64> [#uses=1]
+ %tmp37 = add i64 %indvar, 12 ; <i64> [#uses=1]
+ %tmp39 = add i64 %indvar, 13 ; <i64> [#uses=1]
+ %tmp41 = add i64 %indvar, 14 ; <i64> [#uses=1]
+ %tmp43 = add i64 %indvar, 15 ; <i64> [#uses=1]
+ %tmp45 = add i64 %indvar, 16 ; <i64> [#uses=1]
+ %tmp47 = add i64 %indvar, 17 ; <i64> [#uses=1]
+ %mul = trunc i64 %indvar to i32 ; <i32> [#uses=1]
+ %add22 = trunc i64 %tmp15 to i32 ; <i32> [#uses=1]
+ %add28 = trunc i64 %tmp17 to i32 ; <i32> [#uses=1]
+ %add34 = trunc i64 %tmp19 to i32 ; <i32> [#uses=1]
+ %add40 = trunc i64 %tmp21 to i32 ; <i32> [#uses=1]
+ %add46 = trunc i64 %tmp23 to i32 ; <i32> [#uses=1]
+ %add52 = trunc i64 %tmp25 to i32 ; <i32> [#uses=1]
+ %add58 = trunc i64 %tmp27 to i32 ; <i32> [#uses=1]
+ %add64 = trunc i64 %tmp29 to i32 ; <i32> [#uses=1]
+ %add70 = trunc i64 %tmp31 to i32 ; <i32> [#uses=1]
+ %add82 = trunc i64 %tmp35 to i32 ; <i32> [#uses=1]
+ %add88 = trunc i64 %tmp37 to i32 ; <i32> [#uses=1]
+ %add94 = trunc i64 %tmp39 to i32 ; <i32> [#uses=1]
+ %add100 = trunc i64 %tmp41 to i32 ; <i32> [#uses=1]
+ %add106 = trunc i64 %tmp43 to i32 ; <i32> [#uses=1]
+ %add112 = trunc i64 %tmp45 to i32 ; <i32> [#uses=1]
+ %add118 = trunc i64 %tmp47 to i32 ; <i32> [#uses=1]
+ %tmp10 = getelementptr %struct.Bu* %bu, i64 %indvar, i32 2 ; <i32*> [#uses=1]
+ %tmp11 = load i32* %tmp10 ; <i32> [#uses=0]
+ tail call void undef(i32 %add22)
+ tail call void undef(i32 %add28)
+ tail call void undef(i32 %add34)
+ tail call void undef(i32 %add40)
+ tail call void undef(i32 %add46)
+ tail call void undef(i32 %add52)
+ tail call void undef(i32 %add58)
+ tail call void undef(i32 %add64)
+ tail call void undef(i32 %add70)
+ tail call void undef(i32 %add82)
+ tail call void undef(i32 %add88)
+ tail call void undef(i32 %add94)
+ tail call void undef(i32 %add100)
+ tail call void undef(i32 %add106)
+ tail call void undef(i32 %add112)
+ tail call void undef(i32 %add118)
+ br label %for.body123
+
+for.body123: ; preds = %for.body123, %lor.lhs.false
+ %j.03 = phi i32 [ 0, %lor.lhs.false ], [ %inc, %for.body123 ] ; <i32> [#uses=2]
+ %add129 = add i32 %mul, %j.03 ; <i32> [#uses=1]
+ tail call void undef(i32 %add129)
+ %inc = add nsw i32 %j.03, 1 ; <i32> [#uses=1]
+ br i1 undef, label %for.inc131, label %for.body123
+
+for.inc131: ; preds = %for.body123, %for.body
+ %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1]
+ br i1 undef, label %for.end134, label %for.body
+
+for.end134: ; preds = %for.inc131
+ ret void
+}