summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2011-01-04 07:46:33 +0000
committerChris Lattner <sabre@nondot.org>2011-01-04 07:46:33 +0000
commite41d3c015ce5cafde305d9a6d9baaae1c41bf46a (patch)
tree72c2cb4742fdf2d7ee5d0fb96df8911bcba0e7a0
parentb7e9ef0ed1e246bd64d97a768555c8334c0d86e9 (diff)
downloadexternal_llvm-e41d3c015ce5cafde305d9a6d9baaae1c41bf46a.zip
external_llvm-e41d3c015ce5cafde305d9a6d9baaae1c41bf46a.tar.gz
external_llvm-e41d3c015ce5cafde305d9a6d9baaae1c41bf46a.tar.bz2
Teach loop-idiom to turn a loop containing a memset into a larger memset
when safe. The testcase is basically this nested loop: void foo(char *X) { for (int i = 0; i != 100; ++i) for (int j = 0; j != 100; ++j) X[j+i*100] = 0; } which gets turned into a single memset now. clang -O3 doesn't optimize this yet though due to a phase ordering issue I haven't analyzed yet. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122806 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp87
-rw-r--r--test/Transforms/LoopIdiom/basic.ll33
2 files changed, 102 insertions, 18 deletions
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index d67f6c1..1fe5c4f 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -39,6 +39,7 @@
#define DEBUG_TYPE "loop-idiom"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/IntrinsicInst.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -72,9 +73,11 @@ namespace {
SmallVectorImpl<BasicBlock*> &ExitBlocks);
bool processLoopStore(StoreInst *SI, const SCEV *BECount);
+ bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
- bool processLoopStoreOfSplatValue(StoreInst *SI, unsigned StoreSize,
- Value *SplatValue,
+ bool processLoopStoreOfSplatValue(Value *DestPtr, unsigned StoreSize,
+ unsigned StoreAlignment,
+ Value *SplatValue, Instruction *TheStore,
const SCEVAddRecExpr *Ev,
const SCEV *BECount);
bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
@@ -210,27 +213,39 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
Instruction *Inst = I++;
// Look for store instructions, which may be optimized to memset/memcpy.
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- if (SI->isVolatile()) continue;
-
WeakVH InstPtr(I);
if (!processLoopStore(SI, BECount)) continue;
MadeChange = true;
// If processing the store invalidated our iterator, start over from the
- // head of the loop.
+ // top of the block.
if (InstPtr == 0)
I = BB->begin();
continue;
}
+ // Look for memset instructions, which may be optimized to a larger memset.
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
+ WeakVH InstPtr(I);
+ if (!processLoopMemSet(MSI, BECount)) continue;
+ MadeChange = true;
+
+ // If processing the memset invalidated our iterator, start over from the
+ // top of the block.
+ if (InstPtr == 0)
+ I = BB->begin();
+ continue;
+ }
}
return MadeChange;
}
-/// scanBlock - Look over a block to see if we can promote anything out of it.
+/// processLoopStore - See if this store can be promoted to a memset or memcpy.
bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
+ if (SI->isVolatile()) return false;
+
Value *StoredVal = SI->getValueOperand();
Value *StorePtr = SI->getPointerOperand();
@@ -261,8 +276,8 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
// turned into a memset of i8 -1, assuming that all the consequtive bytes
// are stored. A store of i32 0x01020304 can never be turned into a memset.
if (Value *SplatValue = isBytewiseValue(StoredVal))
- if (processLoopStoreOfSplatValue(SI, StoreSize, SplatValue, StoreEv,
- BECount))
+ if (processLoopStoreOfSplatValue(StorePtr, StoreSize, SI->getAlignment(),
+ SplatValue, SI, StoreEv, BECount))
return true;
// If the stored value is a strided load in the same loop with the same stride
@@ -281,13 +296,48 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
return false;
}
+/// processLoopMemSet - See if this memset can be promoted to a large memset.
+bool LoopIdiomRecognize::
+processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
+ // We can only handle non-volatile memsets with a constant size.
+ if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength())) return false;
+
+ Value *Pointer = MSI->getDest();
+
+ // See if the pointer expression is an AddRec like {base,+,1} on the current
+ // loop, which indicates a strided store. If we have something else, it's a
+ // random store we can't handle.
+ const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
+ if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine())
+ return false;
+
+ // Reject memsets that are so large that they overflow an unsigned.
+ uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+ if ((SizeInBytes >> 32) != 0)
+ return false;
+
+ // Check to see if the stride matches the size of the memset. If so, then we
+ // know that every byte is touched in the loop.
+ const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
+
+ // TODO: Could also handle negative stride here someday, that will require the
+ // validity check in mayLoopAccessLocation to be updated though.
+ if (Stride == 0 || MSI->getLength() != Stride->getValue())
+ return false;
+
+ return processLoopStoreOfSplatValue(Pointer, (unsigned)SizeInBytes,
+ MSI->getAlignment(), MSI->getValue(),
+ MSI, Ev, BECount);
+}
+
+
/// mayLoopAccessLocation - Return true if the specified loop might access the
/// specified pointer location, which is a loop-strided access. The 'Access'
/// argument specifies what the verboten forms of access are (read or write).
static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
Loop *L, const SCEV *BECount,
unsigned StoreSize, AliasAnalysis &AA,
- StoreInst *IgnoredStore) {
+ Instruction *IgnoredStore) {
// Get the location that may be stored across the loop. Since the access is
// strided positively through memory, we say that the modified location starts
// at the pointer and has infinite size.
@@ -317,8 +367,9 @@ static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
/// processLoopStoreOfSplatValue - We see a strided store of a memsetable value.
/// If we can transform this into a memset in the loop preheader, do so.
bool LoopIdiomRecognize::
-processLoopStoreOfSplatValue(StoreInst *SI, unsigned StoreSize,
- Value *SplatValue,
+processLoopStoreOfSplatValue(Value *DestPtr, unsigned StoreSize,
+ unsigned StoreAlignment, Value *SplatValue,
+ Instruction *TheStore,
const SCEVAddRecExpr *Ev, const SCEV *BECount) {
// Verify that the stored value is loop invariant. If not, we can't promote
// the memset.
@@ -329,9 +380,9 @@ processLoopStoreOfSplatValue(StoreInst *SI, unsigned StoreSize,
// this into a memset in the loop preheader now if we want. However, this
// would be unsafe to do if there is anything else in the loop that may read
// or write to the aliased location. Check for an alias.
- if (mayLoopAccessLocation(SI->getPointerOperand(), AliasAnalysis::ModRef,
+ if (mayLoopAccessLocation(DestPtr, AliasAnalysis::ModRef,
CurLoop, BECount,
- StoreSize, getAnalysis<AliasAnalysis>(), SI))
+ StoreSize, getAnalysis<AliasAnalysis>(), TheStore))
return false;
// Okay, everything looks good, insert the memset.
@@ -344,14 +395,14 @@ processLoopStoreOfSplatValue(StoreInst *SI, unsigned StoreSize,
// header. Just insert code for it in the preheader.
SCEVExpander Expander(*SE);
- unsigned AddrSpace = SI->getPointerAddressSpace();
+ unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace();
Value *BasePtr =
Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
Preheader->getTerminator());
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- const Type *IntPtr = TD->getIntPtrType(SI->getContext());
+ const Type *IntPtr = TD->getIntPtrType(SplatValue->getContext());
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
@@ -364,15 +415,15 @@ processLoopStoreOfSplatValue(StoreInst *SI, unsigned StoreSize,
Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
Value *NewCall =
- Builder.CreateMemSet(BasePtr, SplatValue, NumBytes, SI->getAlignment());
+ Builder.CreateMemSet(BasePtr, SplatValue, NumBytes, StoreAlignment);
DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
- << " from store to: " << *Ev << " at: " << *SI << "\n");
+ << " from store to: " << *Ev << " at: " << *TheStore << "\n");
(void)NewCall;
// Okay, the memset has been formed. Zap the original store and anything that
// feeds into it.
- DeleteDeadInstruction(SI, *SE);
+ DeleteDeadInstruction(TheStore, *SE);
++NumMemSet;
return true;
}
diff --git a/test/Transforms/LoopIdiom/basic.ll b/test/Transforms/LoopIdiom/basic.ll
index f3b55d7..a94aaf1 100644
--- a/test/Transforms/LoopIdiom/basic.ll
+++ b/test/Transforms/LoopIdiom/basic.ll
@@ -240,3 +240,36 @@ for.end: ; preds = %for.body, %entry
; CHECK: ret void
}
+; Two dimensional nested loop should be promoted to one big memset.
+define void @test10(i8* %X) nounwind ssp {
+entry:
+ br label %bb.nph
+
+bb.nph: ; preds = %entry, %for.inc10
+ %i.04 = phi i32 [ 0, %entry ], [ %inc12, %for.inc10 ]
+ br label %for.body5
+
+for.body5: ; preds = %for.body5, %bb.nph
+ %j.02 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body5 ]
+ %mul = mul nsw i32 %i.04, 100
+ %add = add nsw i32 %j.02, %mul
+ %idxprom = sext i32 %add to i64
+ %arrayidx = getelementptr inbounds i8* %X, i64 %idxprom
+ store i8 0, i8* %arrayidx, align 1
+ %inc = add nsw i32 %j.02, 1
+ %cmp4 = icmp eq i32 %inc, 100
+ br i1 %cmp4, label %for.inc10, label %for.body5
+
+for.inc10: ; preds = %for.body5
+ %inc12 = add nsw i32 %i.04, 1
+ %cmp = icmp eq i32 %inc12, 100
+ br i1 %cmp, label %for.end13, label %bb.nph
+
+for.end13: ; preds = %for.inc10
+ ret void
+; CHECK: @test10
+; CHECK: entry:
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %X, i8 0, i64 10000, i32 1, i1 false)
+; CHECK-NOT: store
+; CHECK: ret void
+}