summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Target/X86/CMakeLists.txt1
-rw-r--r--lib/Target/X86/X86.h5
-rw-r--r--lib/Target/X86/X86.td3
-rw-r--r--lib/Target/X86/X86FixupLEAs.cpp251
-rw-r--r--lib/Target/X86/X86Subtarget.cpp1
-rw-r--r--lib/Target/X86/X86Subtarget.h4
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp5
-rw-r--r--test/CodeGen/X86/atom-fixup-lea1.ll38
-rw-r--r--test/CodeGen/X86/atom-fixup-lea2.ll84
-rw-r--r--test/CodeGen/X86/atom-fixup-lea3.ll51
-rw-r--r--test/CodeGen/X86/lsr-static-addr.ll2
11 files changed, 444 insertions, 1 deletions
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index d14899d..7cb71f0 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -33,6 +33,7 @@ set(sources
X86TargetObjectFile.cpp
X86TargetTransformInfo.cpp
X86VZeroUpper.cpp
+ X86FixupLEAs.cpp
)
if( CMAKE_CL_64 )
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 1f9919f..947002f 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -69,6 +69,11 @@ ImmutablePass *createX86TargetTransformInfoPass(const X86TargetMachine *TM);
/// createX86PadShortFunctions - Return a pass that pads short functions
/// with NOOPs. This will prevent a stall when returning on the Atom.
FunctionPass *createX86PadShortFunctions();
+/// createX86FixupLEAs - Return a a pass that selectively replaces
+/// certain instructions (like add, sub, inc, dec, some shifts,
+/// and some multiplies) by equivalent LEA instructions, in order
+/// to eliminate execution delays in some Atom processors.
+FunctionPass *createX86FixupLEAs();
} // End llvm namespace
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 306e3ac..87bb68d 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -139,6 +139,8 @@ def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect",
"CallRegIndirect", "true",
"Call register indirect">;
+def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
+ "LEA instruction needs inputs at AG stage">;
//===----------------------------------------------------------------------===//
// X86 processors supported.
@@ -188,6 +190,7 @@ def : ProcessorModel<"atom", AtomModel,
FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
FeatureSlowDivide,
FeatureCallRegIndirect,
+ FeatureLEAUsesAG,
FeaturePadShortFunctions]>;
// "Arrandale" along with corei3 and corei5
diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
new file mode 100644
index 0000000..82e6de4
--- /dev/null
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -0,0 +1,251 @@
+//===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which will find instructions which
+// can be re-written as LEA instructions in order to reduce pipeline
+// delays for some models of the Intel Atom family.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-fixup-LEAs"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+using namespace llvm;
+
+STATISTIC(NumLEAs, "Number of LEA instructions created");
+
+namespace {
+ class FixupLEAPass : public MachineFunctionPass {
+ enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
+ static char ID;
+ bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI);
+
+ virtual const char *getPassName() const { return "X86 Atom LEA Fixup";}
+ void seekLEAFixup(MachineOperand& p, MachineBasicBlock::iterator& I,
+ MachineFunction::iterator MFI);
+ void processInstruction(MachineBasicBlock::iterator& I,
+ MachineFunction::iterator MFI);
+ RegUsageState usesRegister(MachineOperand& p,
+ MachineBasicBlock::iterator I);
+ MachineBasicBlock::iterator searchBackwards(MachineOperand& p,
+ MachineBasicBlock::iterator& I,
+ MachineFunction::iterator MFI);
+ MachineInstr* postRAConvertToLEA(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const;
+
+ public:
+ FixupLEAPass() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ private:
+ MachineFunction *MF;
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII; // Machine instruction info.
+ LiveVariables *LV;
+
+ };
+ char FixupLEAPass::ID = 0;
+}
+
+/// postRAConvertToLEA - if an instruction can be converted to an
+/// equivalent LEA, insert the new instruction into the basic block
+/// and return a pointer to it. Otherwise, return zero.
+MachineInstr *
+FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+ MachineInstr* MI = MBBI;
+ MachineInstr* NewMI;
+ switch (MI->getOpcode()) {
+ case X86::MOV32rr:
+ case X86::MOV64rr: {
+ const MachineOperand& Src = MI->getOperand(1);
+ const MachineOperand& Dest = MI->getOperand(0);
+ NewMI = BuildMI(*MF, MI->getDebugLoc(),
+ TII->get( MI->getOpcode() == X86::MOV32rr ? X86::LEA32r : X86::LEA64r))
+ .addOperand(Dest)
+ .addOperand(Src).addImm(1).addReg(0).addImm(0).addReg(0);
+ MFI->insert(MBBI, NewMI); // Insert the new inst
+ return NewMI;
+ }
+ case X86::ADD64ri32:
+ case X86::ADD64ri8:
+ case X86::ADD64ri32_DB:
+ case X86::ADD64ri8_DB:
+ case X86::ADD32ri:
+ case X86::ADD32ri8:
+ case X86::ADD32ri_DB:
+ case X86::ADD32ri8_DB:
+ case X86::ADD16ri:
+ case X86::ADD16ri8:
+ case X86::ADD16ri_DB:
+ case X86::ADD16ri8_DB:
+ if (!MI->getOperand(2).isImm()) {
+ // convertToThreeAddress will call getImm()
+ // which requires isImm() to be true
+ return 0;
+ }
+ }
+ return TII->convertToThreeAddress(MFI, MBBI, LV);
+}
+
+FunctionPass *llvm::createX86FixupLEAs() {
+ return new FixupLEAPass();
+}
+
+/// runOnMachineFunction - Loop over all of the basic blocks,
+/// replacing instructions by equivalent LEA instructions
+/// if needed and when possible.
+bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
+ MF = &Func;
+ TII = Func.getTarget().getInstrInfo();
+ TM = &MF->getTarget();
+ LV = getAnalysisIfAvailable<LiveVariables>();
+
+ DEBUG(dbgs() << "Start X86FixupLEAs\n";);
+ // Process all basic blocks.
+ for (MachineFunction::iterator I = Func.begin(), E = Func.end(); I != E; ++I)
+ processBasicBlock(Func, I);
+ DEBUG(dbgs() << "End X86FixupLEAs\n";);
+
+ return true;
+}
+
+/// usesRegister - Determine if an instruction references a machine register
+/// and, if so, whether it reads or writes the register.
+FixupLEAPass::RegUsageState FixupLEAPass::usesRegister(MachineOperand& p,
+ MachineBasicBlock::iterator I) {
+ RegUsageState RegUsage = RU_NotUsed;
+ MachineInstr* MI = I;
+
+ for (unsigned int i = 0; i < MI->getNumOperands(); ++i) {
+ MachineOperand& opnd = MI->getOperand(i);
+ if (opnd.isReg() && opnd.getReg() == p.getReg()){
+ if (opnd.isDef())
+ return RU_Write;
+ RegUsage = RU_Read;
+ }
+ }
+ return RegUsage;
+}
+
+/// getPreviousInstr - Given a reference to an instruction in a basic
+/// block, return a reference to the previous instruction in the block,
+/// wrapping around to the last instruction of the block if the block
+/// branches to itself.
+static inline bool getPreviousInstr(MachineBasicBlock::iterator& I,
+ MachineFunction::iterator MFI) {
+ if (I == MFI->begin()) {
+ if (MFI->isPredecessor(MFI)) {
+ I = --MFI->end();
+ return true;
+ }
+ else
+ return false;
+ }
+ --I;
+ return true;
+}
+
+/// searchBackwards - Step backwards through a basic block, looking
+/// for an instruction which writes a register within
+/// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
+MachineBasicBlock::iterator FixupLEAPass::searchBackwards(MachineOperand& p,
+ MachineBasicBlock::iterator& I,
+ MachineFunction::iterator MFI) {
+ int InstrDistance = 1;
+ MachineBasicBlock::iterator CurInst;
+ static const int INSTR_DISTANCE_THRESHOLD = 5;
+
+ CurInst = I;
+ bool Found;
+ Found = getPreviousInstr(CurInst, MFI);
+ while( Found && I != CurInst) {
+ if (CurInst->isCall() || CurInst->isInlineAsm())
+ break;
+ if (InstrDistance > INSTR_DISTANCE_THRESHOLD)
+ break; // too far back to make a difference
+ if (usesRegister(p, CurInst) == RU_Write){
+ return CurInst;
+ }
+ InstrDistance += TII->getInstrLatency(TM->getInstrItineraryData(), CurInst);
+ Found = getPreviousInstr(CurInst, MFI);
+ }
+ return 0;
+}
+
+/// processInstruction - Given a memory access or LEA instruction
+/// whose address mode uses a base and/or index register, look for
+/// an opportunity to replace the instruction which sets the base or index
+/// register with an equivalent LEA instruction.
+void FixupLEAPass::processInstruction(MachineBasicBlock::iterator& I,
+ MachineFunction::iterator MFI) {
+ // Process a load, store, or LEA instruction.
+ MachineInstr *MI = I;
+ int opcode = MI->getOpcode();
+ const MCInstrDesc& Desc = MI->getDesc();
+ int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags, opcode);
+ if (AddrOffset >= 0) {
+ AddrOffset += X86II::getOperandBias(Desc);
+ MachineOperand& p = MI->getOperand(AddrOffset + X86::AddrBaseReg);
+ if (p.isReg() && p.getReg() != X86::ESP) {
+ seekLEAFixup(p, I, MFI);
+ }
+ MachineOperand& q = MI->getOperand(AddrOffset + X86::AddrIndexReg);
+ if (q.isReg() && q.getReg() != X86::ESP) {
+ seekLEAFixup(q, I, MFI);
+ }
+ }
+}
+
+/// seekLEAFixup - Given a machine register, look for the instruction
+/// which writes it in the current basic block. If found,
+/// try to replace it with an equivalent LEA instruction.
+/// If replacement succeeds, then also process the the newly created
+/// instruction.
+void FixupLEAPass::seekLEAFixup(MachineOperand& p,
+ MachineBasicBlock::iterator& I,
+ MachineFunction::iterator MFI) {
+ MachineBasicBlock::iterator MBI = searchBackwards(p, I, MFI);
+ if (MBI) {
+ MachineInstr* NewMI = postRAConvertToLEA(MFI, MBI, LV);
+ if (NewMI) {
+ ++NumLEAs;
+ DEBUG(dbgs() << "Candidate to replace:"; MBI->dump(););
+ // now to replace with an equivalent LEA...
+ DEBUG(dbgs() << "Replaced by: "; NewMI->dump(););
+ MFI->erase(MBI);
+ MachineBasicBlock::iterator J =
+ static_cast<MachineBasicBlock::iterator> (NewMI);
+ processInstruction(J, MFI);
+ }
+ }
+}
+
+/// processBasicBlock - Loop over all of the instructions in the basic block,
+/// replacing adds and shifts with LEA instructions, where appropriate.
+bool FixupLEAPass::processBasicBlock(MachineFunction &MF,
+ MachineFunction::iterator MFI) {
+
+ for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
+ processInstruction(I, MFI);
+ return false;
+}
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 14619b6..448d2e6 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -467,6 +467,7 @@ void X86Subtarget::initializeEnvironment() {
PostRAScheduler = false;
PadShortFunctions = false;
CallRegIndirect = false;
+ LEAUsesAG = false;
stackAlignment = 4;
// FIXME: this is a known good value for Yonah. How about others?
MaxInlineSizeThreshold = 128;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 6fbdb1d..66832b9 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -165,6 +165,9 @@ protected:
/// CallRegIndirect - True if the Calls with memory reference should be converted
/// to a register-based indirect call.
bool CallRegIndirect;
+ /// LEAUsesAG - True if the LEA instruction inputs have to be ready at
+ /// address generation (AG) time.
+ bool LEAUsesAG;
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
@@ -278,6 +281,7 @@ public:
bool hasSlowDivide() const { return HasSlowDivide; }
bool padShortFunctions() const { return PadShortFunctions; }
bool callRegIndirect() const { return CallRegIndirect; }
+ bool LEAusesAG() const { return LEAUsesAG; }
bool isAtom() const { return X86ProcFamily == IntelAtom; }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 8aa58a2..00fa47f 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -215,6 +215,11 @@ bool X86PassConfig::addPreEmitPass() {
addPass(createX86PadShortFunctions());
ShouldPrint = true;
}
+ if (getOptLevel() != CodeGenOpt::None &&
+ getX86Subtarget().LEAusesAG()){
+ addPass(createX86FixupLEAs());
+ ShouldPrint = true;
+ }
return ShouldPrint;
}
diff --git a/test/CodeGen/X86/atom-fixup-lea1.ll b/test/CodeGen/X86/atom-fixup-lea1.ll
new file mode 100644
index 0000000..4651bf2
--- /dev/null
+++ b/test/CodeGen/X86/atom-fixup-lea1.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s
+; CHECK: addl
+; CHECK-NEXT:leal
+; CHECK-NEXT:decl
+; CHECK-NEXT:jne
+
+; Test for the FixupLEAs pre-emit pass. An LEA should be substituted for the ADD
+; that increments the array pointer because it is within 5 instructions of the
+; corresponding load. The ADD precedes the load by following the loop back edge.
+
+; Original C code
+;int test(int n, int * array)
+;{
+; int sum = 0;
+; for(int i = 0; i < n; i++)
+; sum += array[i];
+; return sum;
+;}
+
+define i32 @test(i32 %n, i32* nocapture %array) {
+entry:
+ %cmp4 = icmp sgt i32 %n, 0
+ br i1 %cmp4, label %for.body, label %for.end
+
+for.body:
+ %i.06 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32* %array, i32 %i.06
+ %0 = load i32* %arrayidx, align 4
+ %add = add nsw i32 %0, %sum.05
+ %inc = add nsw i32 %i.06, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ ret i32 %sum.0.lcssa
+}
diff --git a/test/CodeGen/X86/atom-fixup-lea2.ll b/test/CodeGen/X86/atom-fixup-lea2.ll
new file mode 100644
index 0000000..1855ea1
--- /dev/null
+++ b/test/CodeGen/X86/atom-fixup-lea2.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s
+; CHECK:BB#5
+; CHECK-NEXT:leal
+; CHECK-NEXT:leal
+; CHECK-NEXT:leal
+; CHECK-NEXT:movl
+
+
+; Test for fixup lea pre-emit pass. LEA instructions should be substituted for
+; ADD instructions which compute the address and index of the load because they
+; precede the load within 5 instructions. An LEA should also be substituted for
+; an ADD which computes part of the index because it precedes the index LEA
+; within 5 instructions, this substitution is referred to as backwards chaining.
+
+; Original C Code
+;struct node_t
+;{
+; int k, m, n, p;
+; int * array;
+;};
+
+;extern struct node_t getnode();
+
+;int test()
+;{
+; int sum = 0;
+; struct node_t n = getnode();
+; if(n.array != 0 && n.p > 0 && n.k > 0 && n.n > 0 && n.m > 0) {
+; sum = ((int*)((int)n.array + n.p) )[ n.k + n.m + n.n ];
+; }
+; return sum;
+;}
+
+%struct.node_t = type { i32, i32, i32, i32, i32* }
+
+define i32 @test() {
+entry:
+ %n = alloca %struct.node_t, align 4
+ call void bitcast (void (%struct.node_t*, ...)* @getnode to void (%struct.node_t*)*)(%struct.node_t* sret %n)
+ %array = getelementptr inbounds %struct.node_t* %n, i32 0, i32 4
+ %0 = load i32** %array, align 4
+ %cmp = icmp eq i32* %0, null
+ br i1 %cmp, label %if.end, label %land.lhs.true
+
+land.lhs.true:
+ %p = getelementptr inbounds %struct.node_t* %n, i32 0, i32 3
+ %1 = load i32* %p, align 4
+ %cmp1 = icmp sgt i32 %1, 0
+ br i1 %cmp1, label %land.lhs.true2, label %if.end
+
+land.lhs.true2:
+ %k = getelementptr inbounds %struct.node_t* %n, i32 0, i32 0
+ %2 = load i32* %k, align 4
+ %cmp3 = icmp sgt i32 %2, 0
+ br i1 %cmp3, label %land.lhs.true4, label %if.end
+
+land.lhs.true4:
+ %n5 = getelementptr inbounds %struct.node_t* %n, i32 0, i32 2
+ %3 = load i32* %n5, align 4
+ %cmp6 = icmp sgt i32 %3, 0
+ br i1 %cmp6, label %land.lhs.true7, label %if.end
+
+land.lhs.true7:
+ %m = getelementptr inbounds %struct.node_t* %n, i32 0, i32 1
+ %4 = load i32* %m, align 4
+ %cmp8 = icmp sgt i32 %4, 0
+ br i1 %cmp8, label %if.then, label %if.end
+
+if.then:
+ %add = add i32 %3, %2
+ %add12 = add i32 %add, %4
+ %5 = ptrtoint i32* %0 to i32
+ %add15 = add nsw i32 %1, %5
+ %6 = inttoptr i32 %add15 to i32*
+ %arrayidx = getelementptr inbounds i32* %6, i32 %add12
+ %7 = load i32* %arrayidx, align 4
+ br label %if.end
+
+if.end:
+ %sum.0 = phi i32 [ %7, %if.then ], [ 0, %land.lhs.true7 ], [ 0, %land.lhs.true4 ], [ 0, %land.lhs.true2 ], [ 0, %land.lhs.true ], [ 0, %entry ]
+ ret i32 %sum.0
+}
+
+declare void @getnode(%struct.node_t* sret, ...)
diff --git a/test/CodeGen/X86/atom-fixup-lea3.ll b/test/CodeGen/X86/atom-fixup-lea3.ll
new file mode 100644
index 0000000..311b0b3
--- /dev/null
+++ b/test/CodeGen/X86/atom-fixup-lea3.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s
+; CHECK: addl ([[reg:%[a-z]+]])
+; CHECK-NEXT: addl $4, [[reg]]
+
+; Test for the FixupLEAs pre-emit pass.
+; An LEA should NOT be substituted for the ADD instruction
+; that increments the array pointer if it is greater than 5 instructions
+; away from the memory reference that uses it.
+
+; Original C code: clang -m32 -S -O2
+;int test(int n, int * array, int * m, int * array2)
+;{
+; int i, j = 0;
+; int sum = 0;
+; for (i = 0, j = 0; i < n;) {
+; ++i;
+; *m += array2[j++];
+; sum += array[i];
+; }
+; return sum;
+;}
+
+define i32 @test(i32 %n, i32* nocapture %array, i32* nocapture %m, i32* nocapture %array2) #0 {
+entry:
+ %cmp7 = icmp sgt i32 %n, 0
+ br i1 %cmp7, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph: ; preds = %entry
+ %.pre = load i32* %m, align 4
+ br label %for.body
+
+for.body: ; preds = %for.body, %for.body.lr.ph
+ %0 = phi i32 [ %.pre, %for.body.lr.ph ], [ %add, %for.body ]
+ %sum.010 = phi i32 [ 0, %for.body.lr.ph ], [ %add3, %for.body ]
+ %j.09 = phi i32 [ 0, %for.body.lr.ph ], [ %inc1, %for.body ]
+ %inc1 = add nsw i32 %j.09, 1
+ %arrayidx = getelementptr inbounds i32* %array2, i32 %j.09
+ %1 = load i32* %arrayidx, align 4
+ %add = add nsw i32 %0, %1
+ store i32 %add, i32* %m, align 4
+ %arrayidx2 = getelementptr inbounds i32* %array, i32 %inc1
+ %2 = load i32* %arrayidx2, align 4
+ %add3 = add nsw i32 %2, %sum.010
+ %exitcond = icmp eq i32 %inc1, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+ ret i32 %sum.0.lcssa
+}
+
diff --git a/test/CodeGen/X86/lsr-static-addr.ll b/test/CodeGen/X86/lsr-static-addr.ll
index 6566f56..b2aea90 100644
--- a/test/CodeGen/X86/lsr-static-addr.ll
+++ b/test/CodeGen/X86/lsr-static-addr.ll
@@ -17,7 +17,7 @@
; ATOM-NEXT: movsd A(,%rax,8)
; ATOM-NEXT: mulsd
; ATOM-NEXT: movsd
-; ATOM-NEXT: incq %rax
+; ATOM-NEXT: leaq 1(%rax), %rax
@A = external global [0 x double]