summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Analysis/ScalarEvolution.cpp142
-rw-r--r--test/CodeGen/X86/masked-iv-safe.ll244
-rw-r--r--test/CodeGen/X86/masked-iv-unsafe.ll386
-rw-r--r--test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll10
4 files changed, 769 insertions, 13 deletions
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index a9521bb..63ad297 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -701,17 +701,81 @@ SCEVHandle ScalarEvolution::getZeroExtendExpr(const SCEVHandle &Op,
if (SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
return getZeroExtendExpr(SZ->getOperand(), Ty);
- // FIXME: If the input value is a chrec scev, and we can prove that the value
+ // If the input value is a chrec scev, and we can prove that the value
// did not overflow the old, smaller, value, we can zero extend all of the
- // operands (often constants). This would allow analysis of something like
+ // operands (often constants). This allows analysis of something like
// this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; }
+ if (SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
+ if (AR->isAffine()) {
+ // Check whether the backedge-taken count is SCEVCouldNotCompute.
+ // Note that this serves two purposes: It filters out loops that are
+ // simply not analyzable, and it covers the case where this code is
+ // being called from within backedge-taken count analysis, such that
+ // attempting to ask for the backedge-taken count would likely result
+ // in infinite recursion. In the later case, the analysis code will
+ // cope with a conservative value, and it will take care to purge
+ // that value once it has finished.
+ SCEVHandle BECount = getBackedgeTakenCount(AR->getLoop());
+ if (!isa<SCEVCouldNotCompute>(BECount)) {
+ // Compute the extent of AR and divide it by the step value. This is
+ // used to determine if it's safe to extend the stride value.
+ SCEVHandle Start = AR->getStart();
+ SCEVHandle Step = AR->getStepRecurrence(*this);
+
+ // Check whether the backedge-taken count can be losslessly casted to
+ // the addrec's type. The count is always unsigned.
+ SCEVHandle CastedBECount =
+ getTruncateOrZeroExtend(BECount, Start->getType());
+ if (BECount ==
+ getTruncateOrZeroExtend(CastedBECount, BECount->getType())) {
+ const Type *WideTy =
+ IntegerType::get(getTypeSizeInBits(Start->getType()) * 2);
+ SCEVHandle ZMul =
+ getMulExpr(CastedBECount,
+ getTruncateOrZeroExtend(Step, Start->getType()));
+ // Check whether Start+Step*BECount has no unsigned overflow.
+ if (getZeroExtendExpr(ZMul, WideTy) ==
+ getMulExpr(getZeroExtendExpr(CastedBECount, WideTy),
+ getZeroExtendExpr(Step, WideTy))) {
+ SCEVHandle Add = getAddExpr(Start, ZMul);
+ if (getZeroExtendExpr(Add, WideTy) ==
+ getAddExpr(getZeroExtendExpr(Start, WideTy),
+ getZeroExtendExpr(ZMul, WideTy)))
+ // Return the expression with the addrec on the outside.
+ return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+ getZeroExtendExpr(Step, Ty),
+ AR->getLoop());
+ }
+
+ // Similar to above, only this time treat the step value as signed.
+ // This covers loops that count down.
+ SCEVHandle SMul =
+ getMulExpr(CastedBECount,
+ getTruncateOrSignExtend(Step, Start->getType()));
+ // Check whether Start+Step*BECount has no unsigned overflow.
+ if (getSignExtendExpr(SMul, WideTy) ==
+ getMulExpr(getZeroExtendExpr(CastedBECount, WideTy),
+ getSignExtendExpr(Step, WideTy))) {
+ SCEVHandle Add = getAddExpr(Start, SMul);
+ if (getZeroExtendExpr(Add, WideTy) ==
+ getAddExpr(getZeroExtendExpr(Start, WideTy),
+ getSignExtendExpr(SMul, WideTy)))
+ // Return the expression with the addrec on the outside.
+ return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+ getSignExtendExpr(Step, Ty),
+ AR->getLoop());
+ }
+ }
+ }
+ }
SCEVZeroExtendExpr *&Result = (*SCEVZeroExtends)[std::make_pair(Op, Ty)];
if (Result == 0) Result = new SCEVZeroExtendExpr(Op, Ty);
return Result;
}
-SCEVHandle ScalarEvolution::getSignExtendExpr(const SCEVHandle &Op, const Type *Ty) {
+SCEVHandle ScalarEvolution::getSignExtendExpr(const SCEVHandle &Op,
+ const Type *Ty) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
"This is not an extending conversion!");
@@ -726,10 +790,54 @@ SCEVHandle ScalarEvolution::getSignExtendExpr(const SCEVHandle &Op, const Type *
if (SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
return getSignExtendExpr(SS->getOperand(), Ty);
- // FIXME: If the input value is a chrec scev, and we can prove that the value
+ // If the input value is a chrec scev, and we can prove that the value
// did not overflow the old, smaller, value, we can sign extend all of the
- // operands (often constants). This would allow analysis of something like
+ // operands (often constants). This allows analysis of something like
// this: for (signed char X = 0; X < 100; ++X) { int Y = X; }
+ if (SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
+ if (AR->isAffine()) {
+ // Check whether the backedge-taken count is SCEVCouldNotCompute.
+ // Note that this serves two purposes: It filters out loops that are
+ // simply not analyzable, and it covers the case where this code is
+ // being called from within backedge-taken count analysis, such that
+ // attempting to ask for the backedge-taken count would likely result
+ // in infinite recursion. In the later case, the analysis code will
+ // cope with a conservative value, and it will take care to purge
+ // that value once it has finished.
+ SCEVHandle BECount = getBackedgeTakenCount(AR->getLoop());
+ if (!isa<SCEVCouldNotCompute>(BECount)) {
+ // Compute the extent of AR and divide it by the step value. This is
+ // used to determine if it's safe to extend the stride value.
+ SCEVHandle Start = AR->getStart();
+ SCEVHandle Step = AR->getStepRecurrence(*this);
+
+ // Check whether the backedge-taken count can be losslessly casted to
+ // the addrec's type. The count is always unsigned.
+ SCEVHandle CastedBECount =
+ getTruncateOrZeroExtend(BECount, Start->getType());
+ if (BECount ==
+ getTruncateOrZeroExtend(CastedBECount, BECount->getType())) {
+ const Type *WideTy =
+ IntegerType::get(getTypeSizeInBits(Start->getType()) * 2);
+ SCEVHandle SMul =
+ getMulExpr(CastedBECount,
+ getTruncateOrSignExtend(Step, Start->getType()));
+ // Check whether Start+Step*BECount has no signed overflow.
+ if (getSignExtendExpr(SMul, WideTy) ==
+ getMulExpr(getSignExtendExpr(CastedBECount, WideTy),
+ getSignExtendExpr(Step, WideTy))) {
+ SCEVHandle Add = getAddExpr(Start, SMul);
+ if (getSignExtendExpr(Add, WideTy) ==
+ getAddExpr(getSignExtendExpr(Start, WideTy),
+ getSignExtendExpr(SMul, WideTy)))
+ // Return the expression with the addrec on the outside.
+ return getAddRecExpr(getSignExtendExpr(Start, Ty),
+ getSignExtendExpr(Step, Ty),
+ AR->getLoop());
+ }
+ }
+ }
+ }
SCEVSignExtendExpr *&Result = (*SCEVSignExtends)[std::make_pair(Op, Ty)];
if (Result == 0) Result = new SCEVSignExtendExpr(Op, Ty);
@@ -1962,20 +2070,36 @@ SCEVHandle ScalarEvolution::createSCEV(Value *V) {
/// hasLoopInvariantBackedgeTakenCount).
///
SCEVHandle ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
- std::map<const Loop*, SCEVHandle>::iterator I = BackedgeTakenCounts.find(L);
- if (I == BackedgeTakenCounts.end()) {
+ // Initially insert a CouldNotCompute for this loop. If the insertion
+ // succeeds, procede to actually compute a backedge-taken count and
+ // update the value. The temporary CouldNotCompute value tells SCEV
+ // code elsewhere that it shouldn't attempt to request a new
+ // backedge-taken count, which could result in infinite recursion.
+ std::pair<std::map<const Loop*, SCEVHandle>::iterator, bool> Pair =
+ BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute()));
+ if (Pair.second) {
SCEVHandle ItCount = ComputeBackedgeTakenCount(L);
- I = BackedgeTakenCounts.insert(std::make_pair(L, ItCount)).first;
if (ItCount != UnknownValue) {
assert(ItCount->isLoopInvariant(L) &&
"Computed trip count isn't loop invariant for loop!");
++NumTripCountsComputed;
+
+ // Now that we know the trip count for this loop, forget any
+ // existing SCEV values for PHI nodes in this loop since they
+ // are only conservative estimates made without the benefit
+ // of trip count information.
+ for (BasicBlock::iterator I = L->getHeader()->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ deleteValueFromRecords(PN);
+
+ // Update the value in the map.
+ Pair.first->second = ItCount;
} else if (isa<PHINode>(L->getHeader()->begin())) {
// Only count loops that have phi nodes as not being computable.
++NumTripCountsNotComputed;
}
}
- return I->second;
+ return Pair.first->second;
}
/// forgetLoopBackedgeTakenCount - This method should be called by the
diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll
new file mode 100644
index 0000000..2ba3f83
--- /dev/null
+++ b/test/CodeGen/X86/masked-iv-safe.ll
@@ -0,0 +1,244 @@
+; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: not grep and %t
+; RUN: not grep movz %t
+; RUN: not grep sar %t
+; RUN: not grep shl %t
+; RUN: grep add %t | count 6
+; RUN: grep inc %t | count 4
+; RUN: grep dec %t | count 2
+; RUN: grep lea %t | count 2
+
+; Optimize away zext-inreg and sext-inreg on the loop induction
+; variable using trip-count information.
+
+define void @count_up(double* %d, i64 %n) nounwind {
+entry:
+ br label %loop
+
+loop:
+ %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+ %indvar.i8 = and i64 %indvar, 255
+ %t0 = getelementptr double* %d, i64 %indvar.i8
+ %t1 = load double* %t0
+ %t2 = mul double %t1, 0.1
+ store double %t2, double* %t0
+ %indvar.i24 = and i64 %indvar, 16777215
+ %t3 = getelementptr double* %d, i64 %indvar.i24
+ %t4 = load double* %t3
+ %t5 = mul double %t4, 2.3
+ store double %t5, double* %t3
+ %t6 = getelementptr double* %d, i64 %indvar
+ %t7 = load double* %t6
+ %t8 = mul double %t7, 4.5
+ store double %t8, double* %t6
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, 10
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+define void @count_down(double* %d, i64 %n) nounwind {
+entry:
+ br label %loop
+
+loop:
+ %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+ %indvar.i8 = and i64 %indvar, 255
+ %t0 = getelementptr double* %d, i64 %indvar.i8
+ %t1 = load double* %t0
+ %t2 = mul double %t1, 0.1
+ store double %t2, double* %t0
+ %indvar.i24 = and i64 %indvar, 16777215
+ %t3 = getelementptr double* %d, i64 %indvar.i24
+ %t4 = load double* %t3
+ %t5 = mul double %t4, 2.3
+ store double %t5, double* %t3
+ %t6 = getelementptr double* %d, i64 %indvar
+ %t7 = load double* %t6
+ %t8 = mul double %t7, 4.5
+ store double %t8, double* %t6
+ %indvar.next = sub i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, 0
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+define void @count_up_signed(double* %d, i64 %n) nounwind {
+entry:
+ br label %loop
+
+loop:
+ %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+ %s0 = shl i64 %indvar, 8
+ %indvar.i8 = ashr i64 %s0, 8
+ %t0 = getelementptr double* %d, i64 %indvar.i8
+ %t1 = load double* %t0
+ %t2 = mul double %t1, 0.1
+ store double %t2, double* %t0
+ %s1 = shl i64 %indvar, 24
+ %indvar.i24 = ashr i64 %s1, 24
+ %t3 = getelementptr double* %d, i64 %indvar.i24
+ %t4 = load double* %t3
+ %t5 = mul double %t4, 2.3
+ store double %t5, double* %t3
+ %t6 = getelementptr double* %d, i64 %indvar
+ %t7 = load double* %t6
+ %t8 = mul double %t7, 4.5
+ store double %t8, double* %t6
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, 10
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+define void @count_down_signed(double* %d, i64 %n) nounwind {
+entry:
+ br label %loop
+
+loop:
+ %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+ %s0 = shl i64 %indvar, 8
+ %indvar.i8 = ashr i64 %s0, 8
+ %t0 = getelementptr double* %d, i64 %indvar.i8
+ %t1 = load double* %t0
+ %t2 = mul double %t1, 0.1
+ store double %t2, double* %t0
+ %s1 = shl i64 %indvar, 24
+ %indvar.i24 = ashr i64 %s1, 24
+ %t3 = getelementptr double* %d, i64 %indvar.i24
+ %t4 = load double* %t3
+ %t5 = mul double %t4, 2.3
+ store double %t5, double* %t3
+ %t6 = getelementptr double* %d, i64 %indvar
+ %t7 = load double* %t6
+ %t8 = mul double %t7, 4.5
+ store double %t8, double* %t6
+ %indvar.next = sub i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, 0
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+define void @another_count_up(double* %d, i64 %n) nounwind {
+entry:
+ br label %loop
+
+loop:
+ %indvar = phi i64 [ 18446744073709551615, %entry ], [ %indvar.next, %loop ]
+ %indvar.i8 = and i64 %indvar, 255
+ %t0 = getelementptr double* %d, i64 %indvar.i8
+ %t1 = load double* %t0
+ %t2 = mul double %t1, 0.1
+ store double %t2, double* %t0
+ %indvar.i24 = and i64 %indvar, 16777215
+ %t3 = getelementptr double* %d, i64 %indvar.i24
+ %t4 = load double* %t3
+ %t5 = mul double %t4, 2.3
+ store double %t5, double* %t3
+ %t6 = getelementptr double* %d, i64 %indvar
+ %t7 = load double* %t6
+ %t8 = mul double %t7, 4.5
+ store double %t8, double* %t6
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, 0
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+define void @another_count_down(double* %d, i64 %n) nounwind {
+entry:
+ br label %loop
+
+loop:
+ %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+ %indvar.i8 = and i64 %indvar, 255
+ %t0 = getelementptr double* %d, i64 %indvar.i8
+ %t1 = load double* %t0
+ %t2 = mul double %t1, 0.1
+ store double %t2, double* %t0
+ %indvar.i24 = and i64 %indvar, 16777215
+ %t3 = getelementptr double* %d, i64 %indvar.i24
+ %t4 = load double* %t3
+ %t5 = mul double %t4, 2.3
+ store double %t5, double* %t3
+ %t6 = getelementptr double* %d, i64 %indvar
+ %t7 = load double* %t6
+ %t8 = mul double %t7, 4.5
+ store double %t8, double* %t6
+ %indvar.next = sub i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, 18446744073709551615
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+define void @another_count_up_signed(double* %d, i64 %n) nounwind {
+entry:
+ br label %loop
+
+loop:
+ %indvar = phi i64 [ 18446744073709551615, %entry ], [ %indvar.next, %loop ]
+ %s0 = shl i64 %indvar, 8
+ %indvar.i8 = ashr i64 %s0, 8
+ %t0 = getelementptr double* %d, i64 %indvar.i8
+ %t1 = load double* %t0
+ %t2 = mul double %t1, 0.1
+ store double %t2, double* %t0
+ %s1 = shl i64 %indvar, 24
+ %indvar.i24 = ashr i64 %s1, 24
+ %t3 = getelementptr double* %d, i64 %indvar.i24
+ %t4 = load double* %t3
+ %t5 = mul double %t4, 2.3
+ store double %t5, double* %t3
+ %t6 = getelementptr double* %d, i64 %indvar
+ %t7 = load double* %t6
+ %t8 = mul double %t7, 4.5
+ store double %t8, double* %t6
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, 0
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+define void @another_count_down_signed(double* %d, i64 %n) nounwind {
+entry:
+ br label %loop
+
+loop:
+ %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+ %s0 = shl i64 %indvar, 8
+ %indvar.i8 = ashr i64 %s0, 8
+ %t0 = getelementptr double* %d, i64 %indvar.i8
+ %t1 = load double* %t0
+ %t2 = mul double %t1, 0.1
+ store double %t2, double* %t0
+ %s1 = shl i64 %indvar, 24
+ %indvar.i24 = ashr i64 %s1, 24
+ %t3 = getelementptr double* %d, i64 %indvar.i24
+ %t4 = load double* %t3
+ %t5 = mul double %t4, 2.3
+ store double %t5, double* %t3
+ %t6 = getelementptr double* %d, i64 %indvar
+ %t7 = load double* %t6
+ %t8 = mul double %t7, 4.5
+ store double %t8, double* %t6
+ %indvar.next = sub i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, 18446744073709551615
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
diff --git a/test/CodeGen/X86/masked-iv-unsafe.ll b/test/CodeGen/X86/masked-iv-unsafe.ll
new file mode 100644
index 0000000..7ccfe85
--- /dev/null
+++ b/test/CodeGen/X86/masked-iv-unsafe.ll
@@ -0,0 +1,386 @@
+; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: grep and %t | count 6
+; RUN: grep movzb %t | count 6
+; RUN: grep sar %t | count 12
+
+; Don't optimize away zext-inreg and sext-inreg on the loop induction
+; variable, because it isn't safe to do so in these cases.
+
+define void @count_up(double* %d, i64 %n) nounwind {
+entry:
+ br label %loop
+
+loop:
+ %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+ %indvar.i8 = and i64 %indvar, 255
+ %t0 = getelementptr double* %d, i64 %indvar.i8
+ %t1 = load double* %t0
+ %t2 = mul double %t1, 0.1
+ store double %t2, double* %t0
+ %indvar.i24 = and i64 %indvar, 16777215
+ %t3 = getelementptr double* %d, i64 %indvar.i24
+ %t4 = load double* %t3
+ %t5 = mul double %t4, 2.3
+ store double %t5, double* %t3
+ %t6 = getelementptr double* %d, i64 %indvar
+ %t7 = load double* %t6
+ %t8 = mul double %t7, 4.5
+ store double %t8, double* %t6
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, 0
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+define void @count_down(double* %d, i64 %n) nounwind {
+entry:
+ br label %loop
+
+loop:
+ %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+ %indvar.i8 = and i64 %indvar, 255
+ %t0 = getelementptr double* %d, i64 %indvar.i8
+ %t1 = load double* %t0
+ %t2 = mul double %t1, 0.1
+ store double %t2, double* %t0
+ %indvar.i24 = and i64 %indvar, 16777215
+ %t3 = getelementptr double* %d, i64 %indvar.i24
+ %t4 = load double* %t3
+ %t5 = mul double %t4, 2.3
+ store double %t5, double* %t3
+ %t6 = getelementptr double* %d, i64 %indvar
+ %t7 = load double* %t6
+ %t8 = mul double %t7, 4.5
+ store double %t8, double* %t6
+ %indvar.next = sub i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, 20
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+define void @count_up_signed(double* %d, i64 %n) nounwind {
+entry:
+ br label %loop
+
+loop:
+ %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+ %s0 = shl i64 %indvar, 8
+ %indvar.i8 = ashr i64 %s0, 8
+ %t0 = getelementptr double* %d, i64 %indvar.i8
+ %t1 = load double* %t0
+ %t2 = mul double %t1, 0.1
+ store double %t2, double* %t0
+ %s1 = shl i64 %indvar, 24
+ %indvar.i24 = ashr i64 %s1, 24
+ %t3 = getelementptr double* %d, i64 %indvar.i24
+ %t4 = load double* %t3
+ %t5 = mul double %t4, 2.3
+ store double %t5, double* %t3
+ %t6 = getelementptr double* %d, i64 %indvar
+ %t7 = load double* %t6
+ %t8 = mul double %t7, 4.5
+ store double %t8, double* %t6
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, 0
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+define void @count_down_signed(double* %d, i64 %n) nounwind {
+entry:
+ br label %loop
+
+loop:
+ %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+ %s0 = shl i64 %indvar, 8
+ %indvar.i8 = ashr i64 %s0, 8
+ %t0 = getelementptr double* %d, i64 %indvar.i8
+ %t1 = load double* %t0
+ %t2 = mul double %t1, 0.1
+ store double %t2, double* %t0
+ %s1 = shl i64 %indvar, 24
+ %indvar.i24 = ashr i64 %s1, 24
+ %t3 = getelementptr double* %d, i64 %indvar.i24
+ %t4 = load double* %t3
+ %t5 = mul double %t4, 2.3
+ store double %t5, double* %t3
+ %t6 = getelementptr double* %d, i64 %indvar
+ %t7 = load double* %t6
+ %t8 = mul double %t7, 4.5
+ store double %t8, double* %t6
+ %indvar.next = sub i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, 20
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+define void @another_count_up(double* %d, i64 %n) nounwind {
+entry:
+ br label %loop
+
+loop:
+ %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+ %indvar.i8 = and i64 %indvar, 255
+ %t0 = getelementptr double* %d, i64 %indvar.i8
+ %t1 = load double* %t0
+ %t2 = mul double %t1, 0.1
+ store double %t2, double* %t0
+ %indvar.i24 = and i64 %indvar, 16777215
+ %t3 = getelementptr double* %d, i64 %indvar.i24
+ %t4 = load double* %t3
+ %t5 = mul double %t4, 2.3
+ store double %t5, double* %t3
+ %t6 = getelementptr double* %d, i64 %indvar
+ %t7 = load double* %t6
+ %t8 = mul double %t7, 4.5
+ store double %t8, double* %t6
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %n
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+define void @another_count_down(double* %d, i64 %n) nounwind {
+entry:
+ br label %loop
+
+loop:
+ %indvar = phi i64 [ %n, %entry ], [ %indvar.next, %loop ]
+ %indvar.i8 = and i64 %indvar, 255
+ %t0 = getelementptr double* %d, i64 %indvar.i8
+ %t1 = load double* %t0
+ %t2 = mul double %t1, 0.1
+ store double %t2, double* %t0
+ %indvar.i24 = and i64 %indvar, 16777215
+ %t3 = getelementptr double* %d, i64 %indvar.i24
+ %t4 = load double* %t3
+ %t5 = mul double %t4, 2.3
+ store double %t5, double* %t3
+ %t6 = getelementptr double* %d, i64 %indvar
+ %t7 = load double* %t6
+ %t8 = mul double %t7, 4.5
+ store double %t8, double* %t6
+ %indvar.next = sub i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, 10
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+define void @another_count_up_signed(double* %d, i64 %n) nounwind {
+entry:
+ br label %loop
+
+loop:
+ %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+ %s0 = shl i64 %indvar, 8
+ %indvar.i8 = ashr i64 %s0, 8
+ %t0 = getelementptr double* %d, i64 %indvar.i8
+ %t1 = load double* %t0
+ %t2 = mul double %t1, 0.1
+ store double %t2, double* %t0
+ %s1 = shl i64 %indvar, 24
+ %indvar.i24 = ashr i64 %s1, 24
+ %t3 = getelementptr double* %d, i64 %indvar.i24
+ %t4 = load double* %t3
+ %t5 = mul double %t4, 2.3
+ store double %t5, double* %t3
+ %t6 = getelementptr double* %d, i64 %indvar
+ %t7 = load double* %t6
+ %t8 = mul double %t7, 4.5
+ store double %t8, double* %t6
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %n
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+define void @another_count_down_signed(double* %d, i64 %n) nounwind {
+entry:
+ br label %loop
+
+loop:
+ %indvar = phi i64 [ %n, %entry ], [ %indvar.next, %loop ]
+ %s0 = shl i64 %indvar, 8
+ %indvar.i8 = ashr i64 %s0, 8
+ %t0 = getelementptr double* %d, i64 %indvar.i8
+ %t1 = load double* %t0
+ %t2 = mul double %t1, 0.1
+ store double %t2, double* %t0
+ %s1 = shl i64 %indvar, 24
+ %indvar.i24 = ashr i64 %s1, 24
+ %t3 = getelementptr double* %d, i64 %indvar.i24
+ %t4 = load double* %t3
+ %t5 = mul double %t4, 2.3
+ store double %t5, double* %t3
+ %t6 = getelementptr double* %d, i64 %indvar
+ %t7 = load double* %t6
+ %t8 = mul double %t7, 4.5
+ store double %t8, double* %t6
+ %indvar.next = sub i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, 10
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+define void @yet_another_count_down(double* %d, i64 %n) nounwind {
+entry:
+ br label %loop
+
+loop:
+ %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+ %indvar.i8 = and i64 %indvar, 255
+ %t0 = getelementptr double* %d, i64 %indvar.i8
+ %t1 = load double* %t0
+ %t2 = mul double %t1, 0.1
+ store double %t2, double* %t0
+ %indvar.i24 = and i64 %indvar, 16777215
+ %t3 = getelementptr double* %d, i64 %indvar.i24
+ %t4 = load double* %t3
+ %t5 = mul double %t4, 2.3
+ store double %t5, double* %t3
+ %t6 = getelementptr double* %d, i64 %indvar
+ %t7 = load double* %t6
+ %t8 = mul double %t7, 4.5
+ store double %t8, double* %t6
+ %indvar.next = sub i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, 18446744073709551615
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+define void @yet_another_count_up(double* %d, i64 %n) nounwind {
+entry:
+ br label %loop
+
+loop:
+ %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+ %indvar.i8 = and i64 %indvar, 255
+ %t0 = getelementptr double* %d, i64 %indvar.i8
+ %t1 = load double* %t0
+ %t2 = mul double %t1, 0.1
+ store double %t2, double* %t0
+ %indvar.i24 = and i64 %indvar, 16777215
+ %t3 = getelementptr double* %d, i64 %indvar.i24
+ %t4 = load double* %t3
+ %t5 = mul double %t4, 2.3
+ store double %t5, double* %t3
+ %t6 = getelementptr double* %d, i64 %indvar
+ %t7 = load double* %t6
+ %t8 = mul double %t7, 4.5
+ store double %t8, double* %t6
+ %indvar.next = add i64 %indvar, 3
+ %exitcond = icmp eq i64 %indvar.next, 10
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+define void @still_another_count_down(double* %d, i64 %n) nounwind {
+entry:
+ br label %loop
+
+loop:
+ %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+ %indvar.i8 = and i64 %indvar, 255
+ %t0 = getelementptr double* %d, i64 %indvar.i8
+ %t1 = load double* %t0
+ %t2 = mul double %t1, 0.1
+ store double %t2, double* %t0
+ %indvar.i24 = and i64 %indvar, 16777215
+ %t3 = getelementptr double* %d, i64 %indvar.i24
+ %t4 = load double* %t3
+ %t5 = mul double %t4, 2.3
+ store double %t5, double* %t3
+ %t6 = getelementptr double* %d, i64 %indvar
+ %t7 = load double* %t6
+ %t8 = mul double %t7, 4.5
+ store double %t8, double* %t6
+ %indvar.next = sub i64 %indvar, 3
+ %exitcond = icmp eq i64 %indvar.next, 0
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+define void @yet_another_count_up_signed(double* %d, i64 %n) nounwind {
+entry:
+ br label %loop
+
+loop:
+ %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+ %s0 = shl i64 %indvar, 8
+ %indvar.i8 = ashr i64 %s0, 8
+ %t0 = getelementptr double* %d, i64 %indvar.i8
+ %t1 = load double* %t0
+ %t2 = mul double %t1, 0.1
+ store double %t2, double* %t0
+ %s1 = shl i64 %indvar, 24
+ %indvar.i24 = ashr i64 %s1, 24
+ %t3 = getelementptr double* %d, i64 %indvar.i24
+ %t4 = load double* %t3
+ %t5 = mul double %t4, 2.3
+ store double %t5, double* %t3
+ %t6 = getelementptr double* %d, i64 %indvar
+ %t7 = load double* %t6
+ %t8 = mul double %t7, 4.5
+ store double %t8, double* %t6
+ %indvar.next = add i64 %indvar, 3
+ %exitcond = icmp eq i64 %indvar.next, 10
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+define void @yet_another_count_down_signed(double* %d, i64 %n) nounwind {
+entry:
+ br label %loop
+
+loop:
+ %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+ %s0 = shl i64 %indvar, 8
+ %indvar.i8 = ashr i64 %s0, 8
+ %t0 = getelementptr double* %d, i64 %indvar.i8
+ %t1 = load double* %t0
+ %t2 = mul double %t1, 0.1
+ store double %t2, double* %t0
+ %s1 = shl i64 %indvar, 24
+ %indvar.i24 = ashr i64 %s1, 24
+ %t3 = getelementptr double* %d, i64 %indvar.i24
+ %t4 = load double* %t3
+ %t5 = mul double %t4, 2.3
+ store double %t5, double* %t3
+ %t6 = getelementptr double* %d, i64 %indvar
+ %t7 = load double* %t6
+ %t8 = mul double %t7, 4.5
+ store double %t8, double* %t6
+ %indvar.next = sub i64 %indvar, 3
+ %exitcond = icmp eq i64 %indvar.next, 0
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+
+
diff --git a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll b/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll
index 06e2312..cb2f3aa 100644
--- a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll
+++ b/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc --x86-asm-syntax=att | grep {cmpl \$8}
+; RUN: llvm-as < %s | llc --x86-asm-syntax=att | grep {cmpq \$8}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-apple-darwin9"
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-darwin9"
; happens after the relevant use, so the comparison stride can be
; easily changed.
-define void @foo() {
+define void @foo() nounwind {
entry:
br label %loop
@@ -14,9 +14,11 @@ loop:
%indvar = phi i32 [ 0, %entry ], [ %i.2.0.us1534, %loop ] ; <i32> [#uses=1]
%i.2.0.us1534 = add i32 %indvar, 1 ; <i32> [#uses=3]
%tmp628.us1540 = shl i32 %i.2.0.us1534, 1 ; <i32> [#uses=1]
- %tmp645646647.us1547 = sext i32 %tmp628.us1540 to i64 ; <i64> [#uses=0]
+ %tmp645646647.us1547 = sext i32 %tmp628.us1540 to i64 ; <i64> [#uses=1]
+ store i64 %tmp645646647.us1547, i64* null
%tmp611.us1535 = icmp eq i32 %i.2.0.us1534, 4 ; <i1> [#uses=2]
- %tmp623.us1538 = select i1 %tmp611.us1535, i32 6, i32 0 ; <i32> [#uses=0]
+ %tmp623.us1538 = select i1 %tmp611.us1535, i32 6, i32 0 ; <i32> [#uses=1]
+ store i32 %tmp623.us1538, i32* null
br i1 %tmp611.us1535, label %exit, label %loop
exit: