Switch this code away from Value::isUsedInBasicBlock. That code either

loops over instructions in the basic block or the use-def list of the value, neither of which are really efficient when repeatedly querying about values in the same basic block. What's more, we already know that the CondBB is small, and so we can do a much more efficient test by counting the uses in CondBB, and seeing if those account for all of the uses. Finally, we shouldn't blanket fail on any such instruction, instead we should conservatively assume that those instructions are part of the cost. Note that this actually fixes a bug in the pass because isUsedInBasicBlock has a really terrible bug in it. I'll fix that in my next commit, but the fix for it would make this code suddenly take the compile time hit I thought it already was taking, so I wanted to go ahead and migrate this code to a faster & better pattern. The bug in isUsedInBasicBlock was also causing other tests to test the wrong thing entirely: for example we weren't actually disabling speculation for floating point operations as intended (and tested), but the test passed because we failed to speculate them due to the isUsedInBasicBlock failure. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173417 91177308-0d34-0410-b5e6-96231b3b80d8
author: Chandler Carruth <chandlerc@gmail.com> 2013-01-25 05:40:09 +0000
committer: Chandler Carruth <chandlerc@gmail.com> 2013-01-25 05:40:09 +0000
commit: baf868b9b8d187744d183d57ef3cbb2a44ca047a (patch)
tree: 7ff2db6f003ee942529afbe365b9466c7e7124b4
parent: 178f7d08a41f2e9432b96cd27f0c8ea42fa0ac9e (diff)
download: external_llvm-baf868b9b8d187744d183d57ef3cbb2a44ca047a.zip
external_llvm-baf868b9b8d187744d183d57ef3cbb2a44ca047a.tar.gz
external_llvm-baf868b9b8d187744d183d57ef3cbb2a44ca047a.tar.bz2
2 files changed, 92 insertions, 7 deletions
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 9f3464d..3cf3984 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1388,6 +1388,13 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
   }
   assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
 
+  // Keep a count of how many times instructions are used within CondBB when
+  // they are candidates for sinking into CondBB. Specifically:
+  // - They are defined in BB, and
+  // - They have no side effects, and
+  // - All of their uses are in CondBB.
+  SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
+
   unsigned SpeculationCost = 0;
   for (BasicBlock::iterator BBI = ThenBB->begin(),
                             BBE = llvm::prior(ThenBB->end());
@@ -1406,9 +1413,11 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
     // Don't hoist the instruction if it's unsafe or expensive.
     if (!isSafeToSpeculativelyExecute(I))
       return false;
-    // FIXME: This should really be a cost metric, but our cost model doesn't
-    // accurately model the expense of select.
-    if (isa<SelectInst>(I))
+    // FIXME: These should really be cost metrics, but our cost model doesn't
+    // accurately model the expense of selects and floating point operations.
+    // FIXME: Is it really safe to speculate floating point operations?
+    // Signaling NaNs break with this, but we shouldn't care, right?
+    if (isa<SelectInst>(I) || I->getType()->isFPOrFPVectorTy())
       return false;
     // FIXME: The cost metric currently doesn't reason accurately about simple
     // versus complex GEPs, take a conservative approach here.
@@ -1422,13 +1431,26 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
     for (User::op_iterator i = I->op_begin(), e = I->op_end();
          i != e; ++i) {
       Instruction *OpI = dyn_cast<Instruction>(*i);
-      if (OpI && OpI->getParent() == BB &&
-          !OpI->mayHaveSideEffects() &&
-          !OpI->isUsedInBasicBlock(BB))
-        return false;
+      if (!OpI || OpI->getParent() != BB ||
+          OpI->mayHaveSideEffects())
+        continue; // Not a candidate for sinking.
+
+      ++SinkCandidateUseCounts[OpI];
     }
   }
 
+  // Consider any sink candidates which are only used in CondBB as costs for
+  // speculation. Note, while we iterate over a DenseMap here, we are summing
+  // and so iteration order isn't significant.
+  for (SmallDenseMap<Instruction *, unsigned, 4>::iterator I =
+           SinkCandidateUseCounts.begin(), E = SinkCandidateUseCounts.end();
+       I != E; ++I)
+    if (I->first->getNumUses() == I->second) {
+      SpeculationCost += TTI.getUserCost(I->first);
+      if (SpeculationCost > TargetTransformInfo::TCC_Basic)
+        return false;
+    }
+
   // Check that the PHI nodes can be converted to selects.
   bool HaveRewritablePHIs = false;
   for (BasicBlock::iterator I = EndBB->begin();
diff --git a/test/Transforms/SimplifyCFG/SpeculativeExec.ll b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
index bcef848..b60e5dc 100644
--- a/test/Transforms/SimplifyCFG/SpeculativeExec.ll
+++ b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
@@ -137,3 +137,66 @@ end:
   ret i16 %x
 }
 
+define i16 @test6(i1* %dummy, i64 %a, i64 %b) {
+; Test that we speculate no-op instructions when those instructions are in the
+; predecessor but could potentially be sunk.
+; CHECK: @test6
+
+entry:
+  %cond1 = load volatile i1* %dummy
+  %a.conv = trunc i64 %a to i16
+  %b.conv = trunc i64 %b to i16
+  br i1 %cond1, label %if, label %end
+
+if:
+  %cond2 = load volatile i1* %dummy
+  %cond3 = load volatile i1* %dummy
+  %cond4 = load volatile i1* %dummy
+  %cmp = icmp ult i16 %a.conv, %b.conv
+  %a.conv2 = trunc i64 %a to i32
+  %b.conv2 = trunc i64 %b to i32
+  br i1 %cond2, label %then, label %end
+
+then:
+  %sub = sub i32 %a.conv2, %b.conv2
+  %sub.conv = trunc i32 %sub to i16
+  br label %end
+
+end:
+  %x = phi i16 [ %a.conv, %entry ], [ %b.conv, %if ], [ %sub.conv, %then ]
+; CHECK-NOT: phi
+; CHECK: select i1
+
+  ret i16 %x
+}
+
+define i16 @test7(i1* %dummy, i16 %a, i16 %b, i32 %x) {
+; Test that we don't speculate when there are instructions that could
+; potentially sink into the conditional block.
+; CHECK: @test7
+
+entry:
+  %cond1 = load volatile i1* %dummy
+  br i1 %cond1, label %if, label %end
+
+if:
+  %cond2 = load volatile i1* %dummy
+  %a.conv = sext i16 %a to i32
+  %b.conv = sext i16 %b to i32
+  %cmp = icmp ult i32 %a.conv, %b.conv
+  %a.conv2 = add i32 %a.conv, %x
+  br i1 %cond2, label %then, label %end
+
+then:
+  %sub = sub i32 %a.conv2, %b.conv
+  %sub.conv = trunc i32 %sub to i16
+  br label %end
+
+end:
+  %y = phi i16 [ %a, %entry ], [ %b, %if ], [ %sub.conv, %then ]
+; CHECK-NOT: select
+; CHECK: phi i16
+
+  ret i16 %y
+}
+
author	Chandler Carruth <chandlerc@gmail.com>	2013-01-25 05:40:09 +0000
committer	Chandler Carruth <chandlerc@gmail.com>	2013-01-25 05:40:09 +0000
commit	baf868b9b8d187744d183d57ef3cbb2a44ca047a (patch)
tree	7ff2db6f003ee942529afbe365b9466c7e7124b4
parent	178f7d08a41f2e9432b96cd27f0c8ea42fa0ac9e (diff)
download	external_llvm-baf868b9b8d187744d183d57ef3cbb2a44ca047a.zip external_llvm-baf868b9b8d187744d183d57ef3cbb2a44ca047a.tar.gz external_llvm-baf868b9b8d187744d183d57ef3cbb2a44ca047a.tar.bz2