diff options
-rw-r--r-- | lib/Analysis/README.txt | 12 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopStrengthReduce.cpp | 5 | ||||
-rw-r--r-- | test/CodeGen/X86/lsr-delayed-fold.ll | 28 |
3 files changed, 44 insertions, 1 deletions
diff --git a/lib/Analysis/README.txt b/lib/Analysis/README.txt index c401090..88ea9f1 100644 --- a/lib/Analysis/README.txt +++ b/lib/Analysis/README.txt @@ -16,3 +16,15 @@ In addition to being much more complicated, it involves i65 arithmetic, which is very inefficient when expanded into code. //===---------------------------------------------------------------------===// + +In test/CodeGen/X86/lsr-delayed-fold.ll, + +ScalarEvolution is forming this expression: + +((trunc i64 (-1 * %arg5) to i32) + (trunc i64 %arg5 to i32) + (-1 * (trunc i64 undef to i32))) + +This could be folded to + +(-1 * (trunc i64 undef to i32)) + +//===---------------------------------------------------------------------===// diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index a09bca8..a09b3dc 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -2060,8 +2060,11 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, LU.Kind, LU.AccessTy, TLI, SE)) continue; + const SCEV *InnerSum = SE.getAddExpr(InnerAddOps); + if (InnerSum->isZero()) + continue; Formula F = Base; - F.BaseRegs[i] = SE.getAddExpr(InnerAddOps); + F.BaseRegs[i] = InnerSum; F.BaseRegs.push_back(*J); if (InsertFormula(LU, LUIdx, F)) // If that formula hadn't been seen before, recurse to find more like diff --git a/test/CodeGen/X86/lsr-delayed-fold.ll b/test/CodeGen/X86/lsr-delayed-fold.ll new file mode 100644 index 0000000..f160c2d --- /dev/null +++ b/test/CodeGen/X86/lsr-delayed-fold.ll @@ -0,0 +1,28 @@ +; RUN: llc -march=x86-64 < %s > /dev/null +; rdar://7886751 + +; ScalarEvolution misses an opportunity to fold ((trunc x) + (trunc -x) + y), +; but LSR should tolerate this. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin11.0" + +define fastcc void @formatValue(i64 %arg5) nounwind { +bb12: ; preds = %bb11 + %t = trunc i64 %arg5 to i32 ; <i32> [#uses=1] + %t13 = sub i64 0, %arg5 ; <i64> [#uses=1] + %t14 = and i64 %t13, 4294967295 ; <i64> [#uses=1] + br label %bb15 + +bb15: ; preds = %bb21, %bb12 + %t16 = phi i64 [ 0, %bb12 ], [ %t23, %bb15 ] ; <i64> [#uses=2] + %t17 = mul i64 %t14, %t16 ; <i64> [#uses=1] + %t18 = add i64 undef, %t17 ; <i64> [#uses=1] + %t19 = trunc i64 %t18 to i32 ; <i32> [#uses=1] + %t22 = icmp eq i32 %t19, %t ; <i1> [#uses=1] + %t23 = add i64 %t16, 1 ; <i64> [#uses=1] + br i1 %t22, label %bb24, label %bb15 + +bb24: ; preds = %bb21, %bb11 + unreachable +} |