diff options
author | Evan Cheng <evan.cheng@apple.com> | 2010-06-22 01:18:16 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2010-06-22 01:18:16 +0000 |
commit | 4d54e5b2dd4a3d3bed38ff9c7aa57fc66adb5855 (patch) | |
tree | 8ffb7495b6a7d723ec73603ac30feef7874e54a0 | |
parent | 7e1d742e1a868871d7c35d3880d8aa5d6d5d21e2 (diff) | |
download | external_llvm-4d54e5b2dd4a3d3bed38ff9c7aa57fc66adb5855.zip external_llvm-4d54e5b2dd4a3d3bed38ff9c7aa57fc66adb5855.tar.gz external_llvm-4d54e5b2dd4a3d3bed38ff9c7aa57fc66adb5855.tar.bz2 |
Tail merging pass shall not break up IT blocks. rdar://8115404
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@106517 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/llvm/Target/TargetInstrInfo.h | 8 | ||||
-rw-r--r-- | lib/CodeGen/BranchFolding.cpp | 23 | ||||
-rw-r--r-- | lib/CodeGen/BranchFolding.h | 5 | ||||
-rw-r--r-- | lib/CodeGen/TargetInstrInfoImpl.cpp | 2 | ||||
-rw-r--r-- | lib/Target/ARM/Thumb2ITBlockPass.cpp | 19 | ||||
-rw-r--r-- | lib/Target/ARM/Thumb2InstrInfo.cpp | 16 | ||||
-rw-r--r-- | lib/Target/ARM/Thumb2InstrInfo.h | 10 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll | 127 |
8 files changed, 190 insertions, 20 deletions
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index 90608f1..58d812b 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -327,6 +327,14 @@ public: /// used by the tail merging pass. virtual void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, MachineBasicBlock *NewDest) const = 0; + + /// isLegalToSplitMBBAt - Return true if it's legal to split the given basic + /// block at the specified instruction (i.e. instruction would be the start + /// of a new basic block). + virtual bool isLegalToSplitMBBAt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) const { + return true; + } /// copyRegToReg - Emit instructions to copy between a pair of registers. It /// returns false if the target does not how to copy between the specified diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index e17e47a..7f98df0 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -370,6 +370,9 @@ void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, /// iterator. This returns the new MBB. MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, MachineBasicBlock::iterator BBI1) { + if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1)) + return 0; + MachineFunction &MF = *CurMBB.getParent(); // Create the fall-through block. @@ -614,9 +617,10 @@ void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, /// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist /// only of the common tail. Create a block that does by splitting one. -unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, - unsigned maxCommonTailLength) { - unsigned commonTailIndex = 0; +bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + unsigned maxCommonTailLength, + unsigned &commonTailIndex) { + commonTailIndex = 0; unsigned TimeEstimate = ~0U; for (unsigned i = 0, e = SameTails.size(); i != e; ++i) { // Use PredBB if possible; that doesn't require a new branch. @@ -644,6 +648,11 @@ unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, << maxCommonTailLength); MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI); + if (!newMBB) { + DEBUG(dbgs() << "... failed!"); + return false; + } + SameTails[commonTailIndex].setBlock(newMBB); SameTails[commonTailIndex].setTailStartPos(newMBB->begin()); @@ -651,7 +660,7 @@ unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, if (PredBB == MBB) PredBB = newMBB; - return commonTailIndex; + return true; } // See if any of the blocks in MergePotentials (which all have a common single @@ -746,7 +755,11 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, !SameTails[commonTailIndex].tailIsWholeBlock())) { // None of the blocks consist entirely of the common tail. // Split a block so that one does. - commonTailIndex = CreateCommonTailOnlyBlock(PredBB, maxCommonTailLength); + if (!CreateCommonTailOnlyBlock(PredBB, + maxCommonTailLength, commonTailIndex)) { + RemoveBlocksWithHash(CurHash, SuccBB, PredBB); + continue; + } } MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h index b087395..15dfa7f 100644 --- a/lib/CodeGen/BranchFolding.h +++ b/lib/CodeGen/BranchFolding.h @@ -102,8 +102,9 @@ namespace llvm { MachineBasicBlock *PredBB); void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB, MachineBasicBlock* PredBB); - unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, - unsigned maxCommonTailLength); + bool CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + unsigned maxCommonTailLength, + unsigned &commonTailIndex); bool OptimizeBranches(MachineFunction &MF); bool OptimizeBlock(MachineBasicBlock *MBB); diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 53f3ee8..5e145cf 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -28,6 +28,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything +/// after it, replacing it with an unconditional branch to NewDest. void TargetInstrInfoImpl::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, MachineBasicBlock *NewDest) const { diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index 52ab71a..57f8eec 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -62,13 +62,6 @@ namespace { char Thumb2ITBlockPass::ID = 0; } -static ARMCC::CondCodes getPredicate(const MachineInstr *MI, unsigned &PredReg){ - unsigned Opc = MI->getOpcode(); - if (Opc == ARM::tBcc || Opc == ARM::t2Bcc) - return ARMCC::AL; - return llvm::getInstrPredicate(MI, PredReg); -} - bool Thumb2ITBlockPass::MoveCPSRUseUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -82,7 +75,7 @@ Thumb2ITBlockPass::MoveCPSRUseUp(MachineBasicBlock &MBB, for (unsigned i = 0; i < 4; ++i) { MachineInstr *MI = &*I; unsigned MPredReg = 0; - ARMCC::CondCodes MCC = getPredicate(MI, MPredReg); + ARMCC::CondCodes MCC = llvm::getITInstrPredicate(MI, MPredReg); if (MCC != ARMCC::AL) { if (MPredReg != PredReg || (MCC != CC && MCC != OCC)) return false; @@ -209,7 +202,7 @@ bool Thumb2ITBlockPass::InsertITBlock(MachineInstr *First, MachineInstr *Last) { return false; unsigned PredReg = 0; - ARMCC::CondCodes CC = getPredicate(First, PredReg); + ARMCC::CondCodes CC = llvm::getITInstrPredicate(First, PredReg); if (CC == ARMCC::AL) return Modified; @@ -222,7 +215,7 @@ bool Thumb2ITBlockPass::InsertITBlock(MachineInstr *First, MachineInstr *Last) { return Modified; MachineInstr *NMI = &*MBBI; unsigned NPredReg = 0; - ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg); + ARMCC::CondCodes NCC = llvm::getITInstrPredicate(NMI, NPredReg); if (NCC != CC && NCC != OCC) { if (NCC != ARMCC::AL) return Modified; @@ -321,7 +314,7 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI, while (I != E && I->isDebugValue()) ++I; unsigned NPredReg = 0; - ARMCC::CondCodes NCC = getPredicate(I, NPredReg); + ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg); if (NCC == CC || NCC == OCC) return true; } @@ -339,7 +332,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { MachineInstr *MI = &*MBBI; DebugLoc dl = MI->getDebugLoc(); unsigned PredReg = 0; - ARMCC::CondCodes CC = getPredicate(MI, PredReg); + ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg); if (CC == ARMCC::AL) { ++MBBI; continue; @@ -375,7 +368,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { MI = NMI; unsigned NPredReg = 0; - ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg); + ARMCC::CondCodes NCC = llvm::getITInstrPredicate(NMI, NPredReg); if (NCC == CC || NCC == OCC) { Mask |= (NCC & 1) << Pos; // Add implicit use of ITSTATE. diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 866ffb1..42fe509 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -88,6 +88,14 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, } bool +Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) const { + unsigned PredReg = 0; + return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL; +} + + +bool Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, @@ -605,3 +613,11 @@ Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI, MBB->insert(++MBBI, SrcMI); } } + +ARMCC::CondCodes +llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { + unsigned Opc = MI->getOpcode(); + if (Opc == ARM::tBcc || Opc == ARM::t2Bcc) + return ARMCC::AL; + return llvm::getInstrPredicate(MI, PredReg); +} diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index d5fc359..8b01bcb 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -35,6 +35,9 @@ public: void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, MachineBasicBlock *NewDest) const; + bool isLegalToSplitMBBAt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) const; + bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, @@ -68,6 +71,13 @@ public: ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const; }; + +/// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical +/// to llvm::getInstrPredicate except it returns AL for conditional branch +/// instructions which are "predicated", but are not in IT blocks. +ARMCC::CondCodes getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg); + + } #endif // THUMB2INSTRUCTIONINFO_H diff --git a/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll b/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll new file mode 100644 index 0000000..c5fc509 --- /dev/null +++ b/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll @@ -0,0 +1,127 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O3 -relocation-model=pic -mcpu=cortex-a8 | FileCheck %s +; rdar://8115404 +; Tail merging must not split an IT block. + +%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } +%struct._RuneCharClass = type { [14 x i8], i32 } +%struct._RuneEntry = type { i32, i32, i32, i32* } +%struct._RuneLocale = type { [8 x i8], [32 x i8], i32 (i8*, i32, i8**)*, i32 (i32, i8*, i32, i8**)*, i32, [256 x i32], [256 x i32], [256 x i32], %struct._RuneRange, %struct._RuneRange, %struct._RuneRange, i8*, i32, i32, %struct._RuneCharClass* } +%struct._RuneRange = type { i32, %struct._RuneEntry* } +%struct.__sFILEX = type opaque +%struct.__sbuf = type { i8*, i32 } + +@finput = external global %struct.FILE* ; <%struct.FILE**> [#uses=1] +@_DefaultRuneLocale = external global %struct._RuneLocale ; <%struct._RuneLocale*> [#uses=0] +@token_buffer = external global [1025 x i8], align 4 ; <[1025 x i8]*> [#uses=1] +@.str73 = external constant [6 x i8], align 4 ; <[6 x i8]*> [#uses=0] +@.str174 = external constant [5 x i8], align 4 ; <[5 x i8]*> [#uses=0] +@.str275 = external constant [6 x i8], align 4 ; <[6 x i8]*> [#uses=0] +@.str376 = external constant [5 x i8], align 4 ; <[5 x i8]*> [#uses=0] +@.str477 = external constant [6 x i8], align 4 ; <[6 x i8]*> [#uses=0] +@.str578 = external constant [6 x i8], align 4 ; <[6 x i8]*> [#uses=0] +@.str679 = external constant [7 x i8], align 4 ; <[7 x i8]*> [#uses=0] +@.str780 = external constant [6 x i8], align 4 ; <[6 x i8]*> [#uses=0] +@.str881 = external constant [5 x i8], align 4 ; <[5 x i8]*> [#uses=0] +@.str982 = external constant [6 x i8], align 4 ; <[6 x i8]*> [#uses=0] +@.str1083 = external constant [9 x i8], align 4 ; <[9 x i8]*> [#uses=0] +@.str1184 = external constant [7 x i8], align 4 ; <[7 x i8]*> [#uses=0] +@.str1285 = external constant [16 x i8], align 4 ; <[16 x i8]*> [#uses=0] +@.str1386 = external constant [12 x i8], align 4 ; <[12 x i8]*> [#uses=0] +@.str1487 = external constant [5 x i8], align 4 ; <[5 x i8]*> [#uses=0] +@llvm.used = external global [1 x i8*] ; <[1 x i8*]*> [#uses=0] + +define fastcc i32 @parse_percent_token() nounwind { +entry: +; CHECK: ittt eq +; CHECK: ittt eq +; CHECK: ittt eq +; CHECK: ittt eq +; CHECK: ittt eq +; CHECK: moveq r0 +; CHECK-NOT: LBB0_ +; CHECK: ldreq +; CHECK: popeq + switch i32 undef, label %bb7 [ + i32 37, label %bb43 + i32 48, label %bb5 + i32 50, label %bb4 + i32 60, label %bb2 + i32 61, label %bb6 + i32 62, label %bb3 + i32 123, label %bb1 + ] + +bb1: ; preds = %entry + ret i32 8 + +bb2: ; preds = %entry + ret i32 15 + +bb3: ; preds = %entry + ret i32 16 + +bb4: ; preds = %entry + ret i32 17 + +bb5: ; preds = %entry + ret i32 9 + +bb6: ; preds = %entry + ret i32 18 + +bb7: ; preds = %entry + br i1 undef, label %bb.i.i, label %bb1.i.i + +bb.i.i: ; preds = %bb7 + br i1 undef, label %bb43, label %bb12 + +bb1.i.i: ; preds = %bb7 + unreachable + +bb9: ; preds = %bb.i.i2 + br i1 undef, label %bb10, label %bb11 + +bb10: ; preds = %bb9 + br label %bb11 + +bb11: ; preds = %bb10, %bb9 + %p.0 = phi i8* [ undef, %bb10 ], [ %p.1, %bb9 ] ; <i8*> [#uses=1] + %0 = load %struct.FILE** @finput, align 4 ; <%struct.FILE*> [#uses=1] + %1 = tail call i32 @getc(%struct.FILE* %0) nounwind ; <i32> [#uses=0] + br label %bb12 + +bb12: ; preds = %bb11, %bb.i.i + %p.1 = phi i8* [ %p.0, %bb11 ], [ getelementptr inbounds ([1025 x i8]* @token_buffer, i32 0, i32 0), %bb.i.i ] ; <i8*> [#uses=2] + %2 = icmp ult i32 undef, 128 ; <i1> [#uses=1] + br i1 %2, label %bb.i.i2, label %bb1.i.i3 + +bb.i.i2: ; preds = %bb12 + %3 = load i32* null, align 4 ; <i32> [#uses=1] + %4 = lshr i32 %3, 8 ; <i32> [#uses=1] + %.lobit.i1 = and i32 %4, 1 ; <i32> [#uses=1] + %.not = icmp ne i32 %.lobit.i1, 0 ; <i1> [#uses=1] + %or.cond = or i1 %.not, undef ; <i1> [#uses=1] + br i1 %or.cond, label %bb9, label %bb14 + +bb1.i.i3: ; preds = %bb12 + unreachable + +bb14: ; preds = %bb.i.i2 + store i8 0, i8* %p.1, align 1 + br i1 undef, label %bb43, label %bb15 + +bb15: ; preds = %bb14 + unreachable + +bb43: ; preds = %bb14, %bb.i.i, %entry + %.0 = phi i32 [ 7, %entry ], [ 24, %bb.i.i ], [ 9, %bb14 ] ; <i32> [#uses=1] + ret i32 %.0 +} + +declare i32 @getc(%struct.FILE* nocapture) nounwind + +declare i32 @strcmp(i8* nocapture, i8* nocapture) nounwind readonly + +declare i32 @__maskrune(i32, i32) + +declare i32 @ungetc(i32, %struct.FILE* nocapture) nounwind |