summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2010-08-10 19:30:19 +0000
committerEvan Cheng <evan.cheng@apple.com>2010-08-10 19:30:19 +0000
commitac096808a3accc516ae7c193c9a2c1392bf3301a (patch)
tree7add14add83e0e065548a9ad801a591251ca6a83
parent98197e55c10176c3ef9100f7d852abbd2347225f (diff)
downloadexternal_llvm-ac096808a3accc516ae7c193c9a2c1392bf3301a.zip
external_llvm-ac096808a3accc516ae7c193c9a2c1392bf3301a.tar.gz
external_llvm-ac096808a3accc516ae7c193c9a2c1392bf3301a.tar.bz2
Re-apply r110655 with fixes. Epilogue must restore sp from fp if the function stack frame has a var-sized object.
Also added a test case to check for the added benefit of this patch: it's optimizing away the unnecessary restore of sp from fp for some non-leaf functions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@110707 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp89
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp9
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h11
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td16
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp11
-rw-r--r--test/CodeGen/Thumb/large-stack.ll29
-rw-r--r--test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll53
7 files changed, 148 insertions, 70 deletions
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index ee1a82e..12308c4 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -177,7 +177,7 @@ getReservedRegs(const MachineFunction &MF) const {
Reserved.set(ARM::SP);
Reserved.set(ARM::PC);
Reserved.set(ARM::FPSCR);
- if (STI.isTargetDarwin() || hasFP(MF))
+ if (hasFP(MF))
Reserved.set(FramePtr);
// Some targets reserve R9.
if (STI.isR9Reserved())
@@ -194,7 +194,7 @@ bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF,
return true;
case ARM::R7:
case ARM::R11:
- if (FramePtr == Reg && (STI.isTargetDarwin() || hasFP(MF)))
+ if (FramePtr == Reg && hasFP(MF))
return true;
break;
case ARM::R9:
@@ -511,7 +511,7 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
return std::make_pair(RC->allocation_order_begin(MF),
RC->allocation_order_end(MF));
- if (!STI.isTargetDarwin() && !hasFP(MF)) {
+ if (!hasFP(MF)) {
if (!STI.isR9Reserved())
return std::make_pair(GPREven1,
GPREven1 + (sizeof(GPREven1)/sizeof(unsigned)));
@@ -540,7 +540,7 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
return std::make_pair(RC->allocation_order_begin(MF),
RC->allocation_order_end(MF));
- if (!STI.isTargetDarwin() && !hasFP(MF)) {
+ if (!hasFP(MF)) {
if (!STI.isR9Reserved())
return std::make_pair(GPROdd1,
GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned)));
@@ -610,6 +610,10 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
/// or if frame pointer elimination is disabled.
///
bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const {
+ // Mac OS X requires FP not to be clobbered for backtracing purpose.
+ if (STI.isTargetDarwin())
+ return true;
+
const MachineFrameInfo *MFI = MF.getFrameInfo();
// Always eliminate non-leaf frame pointers.
return ((DisableFramePointerElim(MF) && MFI->hasCalls()) ||
@@ -683,6 +687,7 @@ static unsigned estimateStackSize(MachineFunction &MF) {
/// instructions will require a scratch register during their expansion later.
unsigned
ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned Limit = (1 << 12) - 1;
for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
@@ -708,7 +713,10 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
Limit = std::min(Limit, ((1U << 8) - 1) * 4);
break;
case ARMII::AddrModeT2_i12:
- if (hasFP(MF)) Limit = std::min(Limit, (1U << 8) - 1);
+ // i12 supports only positive offset so these will be converted to
+ // i8 opcodes. See llvm::rewriteT2FrameIndex.
+ if (hasFP(MF) && AFI->hasStackFrame())
+ Limit = std::min(Limit, (1U << 8) - 1);
break;
case ARMII::AddrMode6:
// Addressing mode 6 (load/store) instructions can't encode an
@@ -860,8 +868,9 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// and which instructions will need a scratch register for them. Is it
// worth the effort and added fragility?
bool BigStack =
- (RS && (estimateStackSize(MF) + (hasFP(MF) ? 4:0) >=
- estimateRSStackSizeLimit(MF)))
+ (RS &&
+ (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
+ estimateRSStackSizeLimit(MF)))
|| MFI->hasVarSizedObjects()
|| (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
@@ -881,9 +890,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
ExtraCSSpill = true;
}
- // Darwin ABI requires FP to point to the stack slot that contains the
- // previous FP.
- if (STI.isTargetDarwin() || hasFP(MF)) {
+ if (hasFP(MF)) {
MF.getRegInfo().setPhysRegUsed(FramePtr);
NumGPRSpills++;
}
@@ -976,7 +983,7 @@ unsigned ARMBaseRegisterInfo::getRARegister() const {
unsigned
ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- if (STI.isTargetDarwin() || hasFP(MF))
+ if (hasFP(MF))
return FramePtr;
return ARM::SP;
}
@@ -1546,7 +1553,8 @@ emitPrologue(MachineFunction &MF) const {
// Otherwise, if this is not Darwin, all the callee-saved registers go
// into spill area 1, including the FP in R11. In either case, it is
// now safe to emit this assignment.
- if (STI.isTargetDarwin() || hasFP(MF)) {
+ bool HasFP = hasFP(MF);
+ if (HasFP) {
unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri;
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr)
@@ -1565,7 +1573,7 @@ emitPrologue(MachineFunction &MF) const {
unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
- if (STI.isTargetDarwin() || hasFP(MF))
+ if (HasFP)
AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
NumBytes);
AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
@@ -1577,11 +1585,14 @@ emitPrologue(MachineFunction &MF) const {
if (NumBytes) {
// Adjust SP after all the callee-save spills.
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
+ if (HasFP)
+ AFI->setShouldRestoreSPFromFP(true);
}
if (STI.isTargetELF() && hasFP(MF)) {
MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
AFI->getFramePtrSpillOffset());
+ AFI->setShouldRestoreSPFromFP(true);
}
AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
@@ -1614,7 +1625,14 @@ emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
.addReg(ARM::R4, RegState::Kill);
}
+
+ AFI->setShouldRestoreSPFromFP(true);
}
+
+ // If the frame has variable sized objects then the epilogue must restore
+ // the sp from fp.
+ if (!AFI->shouldRestoreSPFromFP() && MFI->hasVarSizedObjects())
+ AFI->setShouldRestoreSPFromFP(true);
}
static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
@@ -1669,34 +1687,25 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
AFI->getGPRCalleeSavedArea2Size() +
AFI->getDPRCalleeSavedAreaSize());
- // Darwin ABI requires FP to point to the stack slot that contains the
- // previous FP.
- bool HasFP = hasFP(MF);
- if ((STI.isTargetDarwin() && NumBytes) || HasFP) {
+ // Reset SP based on frame pointer only if the stack frame extends beyond
+ // frame pointer stack slot or target is ELF and the function has FP.
+ if (AFI->shouldRestoreSPFromFP()) {
NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
- // Reset SP based on frame pointer only if the stack frame extends beyond
- // frame pointer stack slot or target is ELF and the function has FP.
- if (HasFP ||
- AFI->getGPRCalleeSavedArea2Size() ||
- AFI->getDPRCalleeSavedAreaSize() ||
- AFI->getDPRCalleeSavedAreaOffset()) {
- if (NumBytes) {
- if (isARM)
- emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
- ARMCC::AL, 0, TII);
- else
- emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
- ARMCC::AL, 0, TII);
- } else {
- // Thumb2 or ARM.
- if (isARM)
- BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
- .addReg(FramePtr)
- .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
- else
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
- .addReg(FramePtr);
- }
+ if (NumBytes) {
+ if (isARM)
+ emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
+ else
+ emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
+ } else {
+ // Thumb2 or ARM.
+ if (isARM)
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
+ .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ else
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
+ .addReg(FramePtr);
}
} else if (NumBytes)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index db550b1..c3d60ad 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -742,14 +742,15 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
unsigned
ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
- unsigned FPDiff = RegInfo->hasFP(MF) ? 1 : 0;
switch (RC->getID()) {
default:
return 0;
case ARM::tGPRRegClassID:
- return 5 - FPDiff;
- case ARM::GPRRegClassID:
- return 10 - FPDiff - (Subtarget->isR9Reserved() ? 1 : 0);
+ return RegInfo->hasFP(MF) ? 4 : 5;
+ case ARM::GPRRegClassID: {
+ unsigned FP = RegInfo->hasFP(MF) ? 1 : 0;
+ return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0);
+ }
case ARM::SPRRegClassID: // Currently not used as 'rep' register class.
case ARM::DPRRegClassID:
return 32 - 10;
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 7e57a1c..514c26b 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -43,6 +43,10 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// processFunctionBeforeCalleeSavedScan().
bool HasStackFrame;
+ /// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by
+ /// emitPrologue.
+ bool RestoreSPFromFP;
+
/// LRSpilledForFarJump - True if the LR register has been for spilled to
/// enable far jump.
bool LRSpilledForFarJump;
@@ -95,7 +99,7 @@ public:
ARMFunctionInfo() :
isThumb(false),
hasThumb2(false),
- VarArgsRegSaveSize(0), HasStackFrame(false),
+ VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
@@ -106,7 +110,7 @@ public:
explicit ARMFunctionInfo(MachineFunction &MF) :
isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
- VarArgsRegSaveSize(0), HasStackFrame(false),
+ VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
@@ -125,6 +129,9 @@ public:
bool hasStackFrame() const { return HasStackFrame; }
void setHasStackFrame(bool s) { HasStackFrame = s; }
+ bool shouldRestoreSPFromFP() const { return RestoreSPFromFP; }
+ void setShouldRestoreSPFromFP(bool s) { RestoreSPFromFP = s; }
+
bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; }
void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; }
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 5e17175..2b72739 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -294,8 +294,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
if (Subtarget.isThumb1Only()) {
I = THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned));
- // Mac OS X requires FP not to be clobbered for backtracing purpose.
- return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
+ return RI->hasFP(MF) ? I-1 : I;
}
if (Subtarget.isTargetDarwin()) {
@@ -312,8 +311,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
I = ARM_GPR_AO_1 + (sizeof(ARM_GPR_AO_1)/sizeof(unsigned));
}
- // Mac OS X requires FP not to be clobbered for backtracing purpose.
- return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
+ return RI->hasFP(MF) ? I-1 : I;
}
}];
}
@@ -403,8 +401,7 @@ def rGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
if (Subtarget.isThumb1Only()) {
I = THUMB_rGPRAO + (sizeof(THUMB_rGPRAO)/sizeof(unsigned));
- // Mac OS X requires FP not to be clobbered for backtracing purpose.
- return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
+ return RI->hasFP(MF) ? I-1 : I;
}
if (Subtarget.isTargetDarwin()) {
@@ -421,8 +418,7 @@ def rGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
I = ARM_rGPRAO_1 + (sizeof(ARM_rGPRAO_1)/sizeof(unsigned));
}
- // Mac OS X requires FP not to be clobbered for backtracing purpose.
- return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
+ return RI->hasFP(MF) ? I-1 : I;
}
}];
}
@@ -449,11 +445,9 @@ def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {
tGPRClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
const TargetRegisterInfo *RI = TM.getRegisterInfo();
- const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
tGPRClass::iterator I =
THUMB_tGPR_AO + (sizeof(THUMB_tGPR_AO)/sizeof(unsigned));
- // Mac OS X requires FP not to be clobbered for backtracing purpose.
- return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
+ return RI->hasFP(MF) ? I-1 : I;
}
}];
}
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 9ae3145..50328e3 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -742,11 +742,11 @@ void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
dl = MBBI->getDebugLoc();
}
- // Darwin ABI requires FP to point to the stack slot that contains the
- // previous FP.
- if (STI.isTargetDarwin() || hasFP(MF)) {
+ // Adjust FP so it point to the stack slot that contains the previous FP.
+ if (hasFP(MF)) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
.addFrameIndex(FramePtrSpillFI).addImm(0);
+ AFI->setShouldRestoreSPFromFP(true);
}
// Determine starting offsets of spill areas.
@@ -764,10 +764,9 @@ void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes);
}
- if (STI.isTargetELF() && hasFP(MF)) {
+ if (STI.isTargetELF() && hasFP(MF))
MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
AFI->getFramePtrSpillOffset());
- }
AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
@@ -828,7 +827,7 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
AFI->getGPRCalleeSavedArea2Size() +
AFI->getDPRCalleeSavedAreaSize());
- if (hasFP(MF)) {
+ if (AFI->shouldRestoreSPFromFP()) {
NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
// Reset SP based on frame pointer only if the stack frame extends beyond
// frame pointer stack slot or target is ELF and the function has FP.
diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll
index 02de36a..b05e6bf 100644
--- a/test/CodeGen/Thumb/large-stack.ll
+++ b/test/CodeGen/Thumb/large-stack.ll
@@ -1,20 +1,35 @@
-; RUN: llc < %s -march=thumb | grep {ldr.*LCP} | count 5
+; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
define void @test1() {
+; CHECK: test1:
+; CHECK: sub sp, #256
+; CHECK: add sp, #256
%tmp = alloca [ 64 x i32 ] , align 4
ret void
}
define void @test2() {
+; CHECK: test2:
+; CHECK: ldr r0, LCPI
+; CHECK: add sp, r0
+; CHECK: mov sp, r7
+; CHECK: sub sp, #4
%tmp = alloca [ 4168 x i8 ] , align 4
ret void
}
define i32 @test3() {
- %retval = alloca i32, align 4
- %tmp = alloca i32, align 4
- %a = alloca [805306369 x i8], align 16
- store i32 0, i32* %tmp
- %tmp1 = load i32* %tmp
- ret i32 %tmp1
+; CHECK: test3:
+; CHECK: ldr r1, LCPI
+; CHECK: add sp, r1
+; CHECK: ldr r1, LCPI
+; CHECK: add r1, sp
+; CHECK: mov sp, r7
+; CHECK: sub sp, #4
+ %retval = alloca i32, align 4
+ %tmp = alloca i32, align 4
+ %a = alloca [805306369 x i8], align 16
+ store i32 0, i32* %tmp
+ %tmp1 = load i32* %tmp
+ ret i32 %tmp1
}
diff --git a/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
new file mode 100644
index 0000000..abcf13a
--- /dev/null
+++ b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -O3 | FileCheck %s
+
+@.str = private constant [4 x i8] c"%d\0A\00", align 4 ; <[4 x i8]*> [#uses=1]
+
+define internal fastcc i32 @Callee(i32 %i) nounwind {
+entry:
+; CHECK: Callee:
+ %0 = icmp eq i32 %i, 0 ; <i1> [#uses=1]
+ br i1 %0, label %bb2, label %bb
+
+bb: ; preds = %entry
+ %1 = alloca [1000 x i8], align 4 ; <[1000 x i8]*> [#uses=1]
+ %.sub = getelementptr inbounds [1000 x i8]* %1, i32 0, i32 0 ; <i8*> [#uses=2]
+ %2 = call i32 (i8*, i32, i32, i8*, ...)* @__sprintf_chk(i8* %.sub, i32 0, i32 1000, i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %i) nounwind ; <i32> [#uses=0]
+ %3 = load i8* %.sub, align 4 ; <i8> [#uses=1]
+ %4 = sext i8 %3 to i32 ; <i32> [#uses=1]
+ ret i32 %4
+
+bb2: ; preds = %entry
+; Must restore sp from fp here
+; CHECK: mov sp, r7
+; CHECK: sub sp, #8
+; CHECK: pop
+ ret i32 0
+}
+
+declare i32 @__sprintf_chk(i8*, i32, i32, i8*, ...) nounwind
+
+define i32 @main() nounwind {
+; CHECK: main:
+bb.nph:
+ br label %bb
+
+bb: ; preds = %bb, %bb.nph
+ %0 = phi i32 [ 0, %bb.nph ], [ %3, %bb ] ; <i32> [#uses=2]
+ %j.01 = phi i32 [ 0, %bb.nph ], [ %2, %bb ] ; <i32> [#uses=1]
+ %1 = tail call fastcc i32 @Callee(i32 %0) nounwind ; <i32> [#uses=1]
+ %2 = add nsw i32 %1, %j.01 ; <i32> [#uses=2]
+ %3 = add nsw i32 %0, 1 ; <i32> [#uses=2]
+ %exitcond = icmp eq i32 %3, 10000 ; <i1> [#uses=1]
+ br i1 %exitcond, label %bb2, label %bb
+
+bb2: ; preds = %bb
+; No need to restore sp from fp here.
+; CHECK: printf
+; CHECK-NOT: mov sp, r7
+; CHECK-NOT: sub sp, #12
+; CHECK: pop
+ %4 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0]
+ ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind