diff options
author | Shih-wei Liao <sliao@google.com> | 2010-04-07 12:21:42 -0700 |
---|---|---|
committer | Shih-wei Liao <sliao@google.com> | 2010-04-07 12:21:42 -0700 |
commit | e4454320b3cfffe926a487c33fbeb454366de2f8 (patch) | |
tree | 133c05da684edf4a3b2529bcacfa996298c455f6 /lib/Target/X86 | |
parent | 20570085304f0a4ab4f112a01d77958bbd2827a1 (diff) | |
download | external_llvm-e4454320b3cfffe926a487c33fbeb454366de2f8.zip external_llvm-e4454320b3cfffe926a487c33fbeb454366de2f8.tar.gz external_llvm-e4454320b3cfffe926a487c33fbeb454366de2f8.tar.bz2 |
libbcc
Change-Id: Ieaa3ebd5a38f370752495549f8870b534eeedfc5
Diffstat (limited to 'lib/Target/X86')
43 files changed, 1759 insertions, 2033 deletions
diff --git a/lib/Target/X86/Android.mk b/lib/Target/X86/Android.mk new file mode 100644 index 0000000..f5b8180 --- /dev/null +++ b/lib/Target/X86/Android.mk @@ -0,0 +1,47 @@ +LOCAL_PATH := $(call my-dir) + +# For the host only +# ===================================================== +include $(CLEAR_VARS) +include $(CLEAR_TBLGEN_VARS) + +TBLGEN_TABLES := \ + X86GenRegisterInfo.h.inc \ + X86GenRegisterNames.inc \ + X86GenRegisterInfo.inc \ + X86GenInstrNames.inc \ + X86GenInstrInfo.inc \ + X86GenAsmMatcher.inc \ + X86GenDAGISel.inc \ + X86GenDisassemblerTables.inc \ + X86GenFastISel.inc \ + X86GenCallingConv.inc \ + X86GenSubtarget.inc \ + X86GenEDInfo.inc + +LOCAL_SRC_FILES := \ + X86AsmBackend.cpp \ + X86COFFMachineModuleInfo.cpp \ + X86CodeEmitter.cpp \ + X86ELFWriterInfo.cpp \ + X86FastISel.cpp \ + X86FloatingPoint.cpp \ + X86FloatingPointRegKill.cpp \ + X86ISelDAGToDAG.cpp \ + X86ISelLowering.cpp \ + X86InstrInfo.cpp \ + X86JITInfo.cpp \ + X86MCAsmInfo.cpp \ + X86MCCodeEmitter.cpp \ + X86MCTargetExpr.cpp \ + X86RegisterInfo.cpp \ + X86Subtarget.cpp \ + X86TargetMachine.cpp \ + X86TargetObjectFile.cpp + +LOCAL_MODULE:= libLLVMX86CodeGen + +include $(LLVM_HOST_BUILD_MK) +include $(LLVM_TBLGEN_RULES_MK) +include $(LLVM_GEN_INTRINSICS_MK) +include $(BUILD_HOST_STATIC_LIBRARY) diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index acf497a..84d7bb7 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -10,6 +10,7 @@ #include "llvm/Target/TargetAsmParser.h" #include "X86.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" @@ -183,6 +184,14 @@ struct X86Operand : public MCParsedAsmOperand { bool isReg() const { return Kind == Register; } + void addExpr(MCInst &Inst, const MCExpr *Expr) const { + // Add as immediates when possible. + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + else + Inst.addOperand(MCOperand::CreateExpr(Expr)); + } + void addRegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(getReg())); @@ -190,13 +199,13 @@ struct X86Operand : public MCParsedAsmOperand { void addImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateExpr(getImm())); + addExpr(Inst, getImm()); } void addImmSExt8Operands(MCInst &Inst, unsigned N) const { // FIXME: Support user customization of the render method. assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateExpr(getImm())); + addExpr(Inst, getImm()); } void addMemOperands(MCInst &Inst, unsigned N) const { @@ -204,7 +213,7 @@ struct X86Operand : public MCParsedAsmOperand { Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); Inst.addOperand(MCOperand::CreateImm(getMemScale())); Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); - Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); + addExpr(Inst, getMemDisp()); Inst.addOperand(MCOperand::CreateReg(getMemSegReg())); } @@ -218,7 +227,7 @@ struct X86Operand : public MCParsedAsmOperand { Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); Inst.addOperand(MCOperand::CreateImm(getMemScale())); Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); - Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); + addExpr(Inst, getMemDisp()); } static X86Operand *CreateToken(StringRef Str, SMLoc Loc) { @@ -492,24 +501,20 @@ X86Operand *X86ATTAsmParser::ParseMemOperand() { bool X86ATTAsmParser:: ParseInstruction(const StringRef &Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // FIXME: Hack to recognize "sal..." for now. We need a way to represent - // alternative syntaxes in the .td file, without requiring instruction - // duplication. - if (Name.startswith("sal")) { - std::string Tmp = "shl" + Name.substr(3).str(); - Operands.push_back(X86Operand::CreateToken(Tmp, NameLoc)); - } else { - // FIXME: This is a hack. We eventually want to add a general pattern - // mechanism to be used in the table gen file for these assembly names that - // use the same opcodes. Also we should only allow the "alternate names" - // for rep and repne with the instructions they can only appear with. - StringRef PatchedName = Name; - if (Name == "repe" || Name == "repz") - PatchedName = "rep"; - else if (Name == "repnz") - PatchedName = "repne"; - Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); - } + // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to + // represent alternative syntaxes in the .td file, without requiring + // instruction duplication. + StringRef PatchedName = StringSwitch<StringRef>(Name) + .Case("sal", "shl") + .Case("salb", "shlb") + .Case("sall", "shll") + .Case("salq", "shlq") + .Case("salw", "shlw") + .Case("repe", "rep") + .Case("repz", "rep") + .Case("repnz", "repne") + .Default(Name); + Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); if (getLexer().isNot(AsmToken::EndOfStatement)) { diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp index 38ccbf9..734a545 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp @@ -25,10 +25,15 @@ using namespace llvm; // Include the auto-generated portion of the assembly writer. #define MachineInstr MCInst +#define GET_INSTRUCTION_NAME #include "X86GenAsmWriter.inc" #undef MachineInstr void X86ATTInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); } +StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const { + return getInstructionName(Opcode); +} + void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op) { switch (MI->getOperand(Op).getImm()) { @@ -68,7 +73,7 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo) { O << '$' << Op.getImm(); if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256)) - *CommentStream << format("imm = 0x%X\n", Op.getImm()); + *CommentStream << format("imm = 0x%llX\n", (long long)Op.getImm()); } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h index 3180618..d109a07 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h @@ -26,11 +26,12 @@ public: virtual void printInst(const MCInst *MI); - + virtual StringRef getOpcodeName(unsigned Opcode) const; + // Autogenerated by tblgen. void printInstruction(const MCInst *MI); static const char *getRegisterName(unsigned RegNo); - + static const char *getInstructionName(unsigned Opcode); void printOperand(const MCInst *MI, unsigned OpNo); void printMemReference(const MCInst *MI, unsigned Op); diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index 304306d..8cab24c 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -33,10 +33,11 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/Mangler.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/SmallString.h" @@ -52,40 +53,42 @@ void X86AsmPrinter::PrintPICBaseSymbol() const { OutContext); } +MCSymbol *X86AsmPrinter::GetGlobalValueSymbol(const GlobalValue *GV) const { + SmallString<60> NameStr; + Mang->getNameWithPrefix(NameStr, GV, false); + MCSymbol *Symb = OutContext.GetOrCreateSymbol(NameStr.str()); + + if (Subtarget->isTargetCygMing()) { + X86COFFMachineModuleInfo &COFFMMI = + MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); + COFFMMI.DecorateCygMingName(Symb, OutContext, GV, *TM.getTargetData()); + + // Save function name for later type emission. + if (const Function *F = dyn_cast<Function>(GV)) + if (F->isDeclaration()) + COFFMMI.addExternalFunction(Symb->getName()); + + } + + return Symb; +} + /// runOnMachineFunction - Emit the function body. /// bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { SetupMachineFunction(MF); - - // COFF and Cygwin specific mangling stuff. This should be moved out to the - // mangler or handled some other way? - if (Subtarget->isTargetCOFF()) { - X86COFFMachineModuleInfo &COFFMMI = - MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); - // Populate function information map. Don't want to populate - // non-stdcall or non-fastcall functions' information right now. - const Function *F = MF.getFunction(); - CallingConv::ID CC = F->getCallingConv(); - if (CC == CallingConv::X86_StdCall || CC == CallingConv::X86_FastCall) - COFFMMI.AddFunctionInfo(F, *MF.getInfo<X86MachineFunctionInfo>()); - } - if (Subtarget->isTargetCygMing()) { + if (Subtarget->isTargetCOFF()) { const Function *F = MF.getFunction(); - X86COFFMachineModuleInfo &COFFMMI = - MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); - COFFMMI.DecorateCygMingName(CurrentFnSym, OutContext,F,*TM.getTargetData()); - - O << "\t.def\t " << *CurrentFnSym; - O << ";\t.scl\t" << + O << "\t.def\t " << *CurrentFnSym << ";\t.scl\t" << (F->hasInternalLinkage() ? COFF::C_STAT : COFF::C_EXT) << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT) << ";\t.endef\n"; } - + // Have common code print out the function header with linkage info etc. EmitFunctionHeader(); - + // Emit the rest of the function body. EmitFunctionBody(); @@ -119,12 +122,6 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) { else GVSym = GetGlobalValueSymbol(GV); - if (Subtarget->isTargetCygMing()) { - X86COFFMachineModuleInfo &COFFMMI = - MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); - COFFMMI.DecorateCygMingName(GVSym, OutContext, GV, *TM.getTargetData()); - } - // Handle dllimport linkage. if (MO.getTargetFlags() == X86II::MO_DLLIMPORT) GVSym = OutContext.GetOrCreateSymbol(Twine("__imp_") + GVSym->getName()); @@ -585,7 +582,6 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) if (I->hasDLLExportLinkage()) { MCSymbol *Sym = GetGlobalValueSymbol(I); - COFFMMI.DecorateCygMingName(Sym, OutContext, I, *TM.getTargetData()); DLLExportedFns.push_back(Sym); } @@ -607,6 +603,28 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { } } } + + if (Subtarget->isTargetELF()) { + TargetLoweringObjectFileELF &TLOFELF = + static_cast<TargetLoweringObjectFileELF &>(getObjFileLowering()); + + MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); + + // Output stubs for external and common global variables. + MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); + if (!Stubs.empty()) { + OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); + const TargetData *TD = TM.getTargetData(); + + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) + O << *Stubs[i].first << ":\n" + << (TD->getPointerSize() == 8 ? + MAI->getData64bitsDirective() : MAI->getData32bitsDirective()) + << *Stubs[i].second << '\n'; + + Stubs.clear(); + } + } } diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h index 1d32a5f..039214a 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h @@ -61,8 +61,7 @@ class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter { virtual void EmitInstruction(const MachineInstr *MI); void printSymbolOperand(const MachineOperand &MO); - - + virtual MCSymbol *GetGlobalValueSymbol(const GlobalValue *GV) const; // These methods are used by the tablegen'erated instruction printer. void printOperand(const MachineInstr *MI, unsigned OpNo, diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp index 4274d0a..610beb5 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp @@ -24,10 +24,14 @@ using namespace llvm; // Include the auto-generated portion of the assembly writer. #define MachineInstr MCInst +#define GET_INSTRUCTION_NAME #include "X86GenAsmWriter1.inc" #undef MachineInstr void X86IntelInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); } +StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const { + return getInstructionName(Opcode); +} void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op) { switch (MI->getOperand(Op).getImm()) { diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h index 1976177..545bf84 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h @@ -26,10 +26,12 @@ public: : MCInstPrinter(O, MAI) {} virtual void printInst(const MCInst *MI); + virtual StringRef getOpcodeName(unsigned Opcode) const; // Autogenerated by tblgen. void printInstruction(const MCInst *MI); static const char *getRegisterName(unsigned RegNo); + static const char *getInstructionName(unsigned Opcode); void printOperand(const MCInst *MI, unsigned OpNo, diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 61f26a7..eed3b45 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -15,6 +15,7 @@ tablegen(X86GenCallingConv.inc -gen-callingconv) tablegen(X86GenSubtarget.inc -gen-subtarget) set(sources + X86AsmBackend.cpp X86CodeEmitter.cpp X86COFFMachineModuleInfo.cpp X86ELFWriterInfo.cpp diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index 19eb05e..e5f84e8 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -67,8 +67,8 @@ no_exit.i7: ; preds = %no_exit.i7, %build_tree.exit [ %tmp.34.i18, %no_exit.i7 ] %tmp.0.0.0.i10 = phi double [ 0.000000e+00, %build_tree.exit ], [ %tmp.28.i16, %no_exit.i7 ] - %tmp.28.i16 = add double %tmp.0.0.0.i10, 0.000000e+00 - %tmp.34.i18 = add double %tmp.0.1.0.i9, 0.000000e+00 + %tmp.28.i16 = fadd double %tmp.0.0.0.i10, 0.000000e+00 + %tmp.34.i18 = fadd double %tmp.0.1.0.i9, 0.000000e+00 br i1 false, label %Compute_Tree.exit23, label %no_exit.i7 Compute_Tree.exit23: ; preds = %no_exit.i7 @@ -97,7 +97,7 @@ pcmp/pand/pandn/por to do a selection instead of a conditional branch: double %X(double %Y, double %Z, double %A, double %B) { %C = setlt double %A, %B - %z = add double %Z, 0.0 ;; select operand is not a load + %z = fadd double %Z, 0.0 ;; select operand is not a load %D = select bool %C, double %Y, double %z ret double %D } @@ -545,7 +545,7 @@ eliminates a constant pool load. For example, consider: define i64 @ccosf(float %z.0, float %z.1) nounwind readonly { entry: - %tmp6 = sub float -0.000000e+00, %z.1 ; <float> [#uses=1] + %tmp6 = fsub float -0.000000e+00, %z.1 ; <float> [#uses=1] %tmp20 = tail call i64 @ccoshf( float %tmp6, float %z.0 ) nounwind readonly ret i64 %tmp20 } diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index aa7bb3d..d4545a6 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -227,11 +227,6 @@ lambda, siod, optimizer-eval, ackermann, hash2, nestedloop, strcat, and Treesor. //===---------------------------------------------------------------------===// -Teach the coalescer to coalesce vregs of different register classes. e.g. FR32 / -FR64 to VR128. - -//===---------------------------------------------------------------------===// - Adding to the list of cmp / test poor codegen issues: int test(__m128 *A, __m128 *B) { @@ -1868,3 +1863,69 @@ carried over to machine instructions. Asm printer (or JIT) can use this information to add the "lock" prefix. //===---------------------------------------------------------------------===// + +_Bool bar(int *x) { return *x & 1; } + +define zeroext i1 @bar(i32* nocapture %x) nounwind readonly { +entry: + %tmp1 = load i32* %x ; <i32> [#uses=1] + %and = and i32 %tmp1, 1 ; <i32> [#uses=1] + %tobool = icmp ne i32 %and, 0 ; <i1> [#uses=1] + ret i1 %tobool +} + +bar: # @bar +# BB#0: # %entry + movl 4(%esp), %eax + movb (%eax), %al + andb $1, %al + movzbl %al, %eax + ret + +Missed optimization: should be movl+andl. + +//===---------------------------------------------------------------------===// + +Consider the following two functions compiled with clang: +_Bool foo(int *x) { return !(*x & 4); } +unsigned bar(int *x) { return !(*x & 4); } + +foo: + movl 4(%esp), %eax + testb $4, (%eax) + sete %al + movzbl %al, %eax + ret + +bar: + movl 4(%esp), %eax + movl (%eax), %eax + shrl $2, %eax + andl $1, %eax + xorl $1, %eax + ret + +The second function generates more code even though the two functions are +are functionally identical. + +//===---------------------------------------------------------------------===// + +Take the following C code: +int x(int y) { return (y & 63) << 14; } + +Code produced by gcc: + andl $63, %edi + sall $14, %edi + movl %edi, %eax + ret + +Code produced by clang: + shll $14, %edi + movl %edi, %eax + andl $1032192, %eax + ret + +The code produced by gcc is 3 bytes shorter. This sort of construct often +shows up with bitfields. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/TargetInfo/Android.mk b/lib/Target/X86/TargetInfo/Android.mk new file mode 100644 index 0000000..ee53f0d --- /dev/null +++ b/lib/Target/X86/TargetInfo/Android.mk @@ -0,0 +1,24 @@ +LOCAL_PATH := $(call my-dir) + +# For the device only +# ===================================================== +include $(CLEAR_VARS) +include $(CLEAR_TBLGEN_VARS) + +TBLGEN_TABLES := \ + X86GenRegisterNames.inc \ + X86GenInstrNames.inc + +TBLGEN_TD_DIR := $(LOCAL_PATH)/.. + +LOCAL_SRC_FILES := \ + X86TargetInfo.cpp + +LOCAL_C_INCLUDES += \ + $(LOCAL_PATH)/.. + +LOCAL_MODULE:= libLLVMX86Info + +include $(LLVM_HOST_BUILD_MK) +include $(LLVM_TBLGEN_RULES_MK) +include $(BUILD_HOST_STATIC_LIBRARY) diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 1d17a05..ba0ee6c 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -19,12 +19,15 @@ namespace llvm { -class X86TargetMachine; class FunctionPass; -class MachineCodeEmitter; -class MCCodeEmitter; class JITCodeEmitter; +class MCAssembler; +class MCCodeEmitter; +class MCContext; +class MachineCodeEmitter; class Target; +class TargetAsmBackend; +class X86TargetMachine; class formatted_raw_ostream; /// createX86ISelDag - This pass converts a legalized DAG into a @@ -49,9 +52,13 @@ FunctionPass *createX87FPRegKillInserterPass(); FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM, JITCodeEmitter &JCE); -MCCodeEmitter *createHeinousX86MCCodeEmitter(const Target &, TargetMachine &TM); -MCCodeEmitter *createX86_32MCCodeEmitter(const Target &, TargetMachine &TM); -MCCodeEmitter *createX86_64MCCodeEmitter(const Target &, TargetMachine &TM); +MCCodeEmitter *createX86_32MCCodeEmitter(const Target &, TargetMachine &TM, + MCContext &Ctx); +MCCodeEmitter *createX86_64MCCodeEmitter(const Target &, TargetMachine &TM, + MCContext &Ctx); + +TargetAsmBackend *createX86_32AsmBackend(const Target &, MCAssembler &); +TargetAsmBackend *createX86_64AsmBackend(const Target &, MCAssembler &); /// createX86EmitCodeToMemory - Returns a pass that converts a register /// allocated function into raw machine code in a dynamically diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp new file mode 100644 index 0000000..e6654ef --- /dev/null +++ b/lib/Target/X86/X86AsmBackend.cpp @@ -0,0 +1,34 @@ +//===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetAsmBackend.h" +#include "X86.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetAsmBackend.h" +using namespace llvm; + +namespace { + +class X86AsmBackend : public TargetAsmBackend { +public: + X86AsmBackend(const Target &T, MCAssembler &A) + : TargetAsmBackend(T) {} +}; + +} + +TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T, + MCAssembler &A) { + return new X86AsmBackend(T, A); +} + +TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T, + MCAssembler &A) { + return new X86AsmBackend(T, A); +} diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/lib/Target/X86/X86COFFMachineModuleInfo.cpp index ea52795..ab67acb 100644 --- a/lib/Target/X86/X86COFFMachineModuleInfo.cpp +++ b/lib/Target/X86/X86COFFMachineModuleInfo.cpp @@ -27,90 +27,55 @@ X86COFFMachineModuleInfo::X86COFFMachineModuleInfo(const MachineModuleInfo &) { X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() { } -void X86COFFMachineModuleInfo::AddFunctionInfo(const Function *F, - const X86MachineFunctionInfo &Val) { - FunctionInfoMap[F] = Val; +void X86COFFMachineModuleInfo::addExternalFunction(const StringRef& Name) { + CygMingStubs.insert(Name); } - - -static X86MachineFunctionInfo calculateFunctionInfo(const Function *F, - const TargetData &TD) { - X86MachineFunctionInfo Info; - uint64_t Size = 0; - - switch (F->getCallingConv()) { - case CallingConv::X86_StdCall: - Info.setDecorationStyle(StdCall); - break; - case CallingConv::X86_FastCall: - Info.setDecorationStyle(FastCall); - break; - default: - return Info; - } - - unsigned argNum = 1; - for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - AI != AE; ++AI, ++argNum) { - const Type* Ty = AI->getType(); - - // 'Dereference' type in case of byval parameter attribute - if (F->paramHasAttr(argNum, Attribute::ByVal)) - Ty = cast<PointerType>(Ty)->getElementType(); - - // Size should be aligned to DWORD boundary - Size += ((TD.getTypeAllocSize(Ty) + 3)/4)*4; - } - - // We're not supporting tooooo huge arguments :) - Info.setBytesToPopOnReturn((unsigned int)Size); - return Info; -} - - -/// DecorateCygMingName - Query FunctionInfoMap and use this information for -/// various name decorations for Cygwin and MingW. +/// DecorateCygMingName - Apply various name decorations if the function uses +/// stdcall or fastcall calling convention. void X86COFFMachineModuleInfo::DecorateCygMingName(SmallVectorImpl<char> &Name, const GlobalValue *GV, const TargetData &TD) { const Function *F = dyn_cast<Function>(GV); if (!F) return; - - // Save function name for later type emission. - if (F->isDeclaration()) - CygMingStubs.insert(StringRef(Name.data(), Name.size())); - + // We don't want to decorate non-stdcall or non-fastcall functions right now CallingConv::ID CC = F->getCallingConv(); if (CC != CallingConv::X86_StdCall && CC != CallingConv::X86_FastCall) return; - - const X86MachineFunctionInfo *Info; - - FMFInfoMap::const_iterator info_item = FunctionInfoMap.find(F); - if (info_item == FunctionInfoMap.end()) { - // Calculate apropriate function info and populate map - FunctionInfoMap[F] = calculateFunctionInfo(F, TD); - Info = &FunctionInfoMap[F]; - } else { - Info = &info_item->second; - } - - if (Info->getDecorationStyle() == None) return; + + unsigned ArgWords = 0; + DenseMap<const Function*, unsigned>::const_iterator item = FnArgWords.find(F); + if (item == FnArgWords.end()) { + // Calculate arguments sizes + for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + AI != AE; ++AI) { + const Type* Ty = AI->getType(); + + // 'Dereference' type in case of byval parameter attribute + if (AI->hasByValAttr()) + Ty = cast<PointerType>(Ty)->getElementType(); + + // Size should be aligned to DWORD boundary + ArgWords += ((TD.getTypeAllocSize(Ty) + 3)/4)*4; + } + + FnArgWords[F] = ArgWords; + } else + ArgWords = item->second; + const FunctionType *FT = F->getFunctionType(); - // "Pure" variadic functions do not receive @0 suffix. if (!FT->isVarArg() || FT->getNumParams() == 0 || (FT->getNumParams() == 1 && F->hasStructRetAttr())) - raw_svector_ostream(Name) << '@' << Info->getBytesToPopOnReturn(); - - if (Info->getDecorationStyle() == FastCall) { + raw_svector_ostream(Name) << '@' << ArgWords; + + if (CC == CallingConv::X86_FastCall) { if (Name[0] == '_') Name[0] = '@'; else Name.insert(Name.begin(), '@'); - } + } } /// DecorateCygMingName - Query FunctionInfoMap and use this information for @@ -121,6 +86,6 @@ void X86COFFMachineModuleInfo::DecorateCygMingName(MCSymbol *&Name, const TargetData &TD) { SmallString<128> NameStr(Name->getName().begin(), Name->getName().end()); DecorateCygMingName(NameStr, GV, TD); - + Name = Ctx.GetOrCreateSymbol(NameStr.str()); } diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h index 0e2009e..9de3dcd 100644 --- a/lib/Target/X86/X86COFFMachineModuleInfo.h +++ b/lib/Target/X86/X86COFFMachineModuleInfo.h @@ -21,44 +21,25 @@ namespace llvm { class X86MachineFunctionInfo; class TargetData; - + /// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation /// for X86 COFF targets. class X86COFFMachineModuleInfo : public MachineModuleInfoImpl { StringSet<> CygMingStubs; - - // We have to propagate some information about MachineFunction to - // AsmPrinter. It's ok, when we're printing the function, since we have - // access to MachineFunction and can get the appropriate MachineFunctionInfo. - // Unfortunately, this is not possible when we're printing reference to - // Function (e.g. calling it and so on). Even more, there is no way to get the - // corresponding MachineFunctions: it can even be not created at all. That's - // why we should use additional structure, when we're collecting all necessary - // information. - // - // This structure is using e.g. for name decoration for stdcall & fastcall'ed - // function, since we have to use arguments' size for decoration. - typedef std::map<const Function*, X86MachineFunctionInfo> FMFInfoMap; - FMFInfoMap FunctionInfoMap; - + DenseMap<const Function*, unsigned> FnArgWords; public: X86COFFMachineModuleInfo(const MachineModuleInfo &); ~X86COFFMachineModuleInfo(); - - + void DecorateCygMingName(MCSymbol* &Name, MCContext &Ctx, const GlobalValue *GV, const TargetData &TD); void DecorateCygMingName(SmallVectorImpl<char> &Name, const GlobalValue *GV, const TargetData &TD); - - void AddFunctionInfo(const Function *F, const X86MachineFunctionInfo &Val); - + void addExternalFunction(const StringRef& Name); typedef StringSet<>::const_iterator stub_iterator; stub_iterator stub_begin() const { return CygMingStubs.begin(); } stub_iterator stub_end() const { return CygMingStubs.end(); } - - }; diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index f0bceb1..8deadf6 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -387,10 +387,16 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, // If no BaseReg, issue a RIP relative instruction only if the MCE can // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table // 2-7) and absolute references. + unsigned BaseRegNo = -1U; + if (BaseReg != 0 && BaseReg != X86::RIP) + BaseRegNo = getX86RegNum(BaseReg); + if (// The SIB byte must be used if there is an index register. IndexReg.getReg() == 0 && - // The SIB byte must be used if the base is ESP/RSP. - BaseReg != X86::ESP && BaseReg != X86::RSP && + // The SIB byte must be used if the base is ESP/RSP/R12, all of which + // encode to an R/M value of 4, which indicates that a SIB byte is + // present. + BaseRegNo != N86::ESP && // If there is no base register and we're in 64-bit mode, we need a SIB // byte to emit an addr that is just 'disp32' (the non-RIP relative form). (!Is64BitMode || BaseReg != 0)) { @@ -401,7 +407,6 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, return; } - unsigned BaseRegNo = getX86RegNum(BaseReg); // If the base is not EBP/ESP and there is no displacement, use simple // indirect register encoding, this handles addresses like [EAX]. The // encoding for [EBP] with no displacement means [disp32] so we handle it @@ -757,27 +762,8 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, case X86II::MRM4r: case X86II::MRM5r: case X86II::MRM6r: case X86II::MRM7r: { MCE.emitByte(BaseOpcode); - - // Special handling of lfence, mfence, monitor, and mwait. - if (Desc->getOpcode() == X86::LFENCE || - Desc->getOpcode() == X86::MFENCE || - Desc->getOpcode() == X86::MONITOR || - Desc->getOpcode() == X86::MWAIT) { - emitRegModRMByte((Desc->TSFlags & X86II::FormMask)-X86II::MRM0r); - - switch (Desc->getOpcode()) { - default: break; - case X86::MONITOR: - MCE.emitByte(0xC8); - break; - case X86::MWAIT: - MCE.emitByte(0xC9); - break; - } - } else { - emitRegModRMByte(MI.getOperand(CurOp++).getReg(), - (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r); - } + emitRegModRMByte(MI.getOperand(CurOp++).getReg(), + (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r); if (CurOp == NumOps) break; @@ -853,6 +839,27 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, getX86RegNum(MI.getOperand(CurOp).getReg())); ++CurOp; break; + + case X86II::MRM_C1: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xC1); + break; + case X86II::MRM_C8: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xC8); + break; + case X86II::MRM_C9: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xC9); + break; + case X86II::MRM_E8: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xE8); + break; + case X86II::MRM_F0: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xF0); + break; } if (!Desc->isVariadic() && CurOp != NumOps) { @@ -864,335 +871,3 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, MCE.processDebugLoc(MI.getDebugLoc(), false); } - -// Adapt the Emitter / CodeEmitter interfaces to MCCodeEmitter. -// -// FIXME: This is a total hack designed to allow work on llvm-mc to proceed -// without being blocked on various cleanups needed to support a clean interface -// to instruction encoding. -// -// Look away! - -#include "llvm/DerivedTypes.h" - -namespace { -class MCSingleInstructionCodeEmitter : public MachineCodeEmitter { - uint8_t Data[256]; - const MCInst *CurrentInst; - SmallVectorImpl<MCFixup> *FixupList; - -public: - MCSingleInstructionCodeEmitter() { reset(0, 0); } - - void reset(const MCInst *Inst, SmallVectorImpl<MCFixup> *Fixups) { - CurrentInst = Inst; - FixupList = Fixups; - BufferBegin = Data; - BufferEnd = array_endof(Data); - CurBufferPtr = Data; - } - - StringRef str() { - return StringRef(reinterpret_cast<char*>(BufferBegin), - CurBufferPtr - BufferBegin); - } - - virtual void startFunction(MachineFunction &F) {} - virtual bool finishFunction(MachineFunction &F) { return false; } - virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {} - virtual bool earlyResolveAddresses() const { return false; } - virtual void addRelocation(const MachineRelocation &MR) { - unsigned Offset = 0, OpIndex = 0, Kind = MR.getRelocationType(); - - // This form is only used in one case, for branches. - if (MR.isBasicBlock()) { - Offset = unsigned(MR.getMachineCodeOffset()); - OpIndex = 0; - } else { - assert(MR.isJumpTableIndex() && "Unexpected relocation!"); - - Offset = unsigned(MR.getMachineCodeOffset()); - - // The operand index is encoded as the first byte of the fake operand. - OpIndex = MR.getJumpTableIndex(); - } - - MCOperand Op = CurrentInst->getOperand(OpIndex); - assert(Op.isExpr() && "FIXME: Not yet implemented!"); - FixupList->push_back(MCFixup::Create(Offset, Op.getExpr(), - MCFixupKind(FirstTargetFixupKind + Kind))); - } - virtual void setModuleInfo(MachineModuleInfo* Info) {} - - // Interface functions which should never get called in our usage. - - virtual void emitLabel(uint64_t LabelID) { - assert(0 && "Unexpected code emitter call!"); - } - virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const { - assert(0 && "Unexpected code emitter call!"); - return 0; - } - virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const { - assert(0 && "Unexpected code emitter call!"); - return 0; - } - virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { - assert(0 && "Unexpected code emitter call!"); - return 0; - } - virtual uintptr_t getLabelAddress(uint64_t LabelID) const { - assert(0 && "Unexpected code emitter call!"); - return 0; - } -}; - -class X86MCCodeEmitter : public MCCodeEmitter { - X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT - -private: - X86TargetMachine &TM; - llvm::Function *DummyF; - TargetData *DummyTD; - mutable llvm::MachineFunction *DummyMF; - llvm::MachineBasicBlock *DummyMBB; - - MCSingleInstructionCodeEmitter *InstrEmitter; - Emitter<MachineCodeEmitter> *Emit; - -public: - X86MCCodeEmitter(X86TargetMachine &_TM) : TM(_TM) { - // Verily, thou shouldst avert thine eyes. - const llvm::FunctionType *FTy = - FunctionType::get(llvm::Type::getVoidTy(getGlobalContext()), false); - DummyF = Function::Create(FTy, GlobalValue::InternalLinkage); - DummyTD = new TargetData(""); - DummyMF = new MachineFunction(DummyF, TM, 0); - DummyMBB = DummyMF->CreateMachineBasicBlock(); - - InstrEmitter = new MCSingleInstructionCodeEmitter(); - Emit = new Emitter<MachineCodeEmitter>(TM, *InstrEmitter, - *TM.getInstrInfo(), - *DummyTD, false); - } - ~X86MCCodeEmitter() { - delete Emit; - delete InstrEmitter; - delete DummyMF; - delete DummyF; - } - - unsigned getNumFixupKinds() const { - return 5; - } - - MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { - static MCFixupKindInfo Infos[] = { - { "reloc_pcrel_word", 0, 4 * 8 }, - { "reloc_picrel_word", 0, 4 * 8 }, - { "reloc_absolute_word", 0, 4 * 8 }, - { "reloc_absolute_word_sext", 0, 4 * 8 }, - { "reloc_absolute_dword", 0, 8 * 8 } - }; - - assert(Kind >= FirstTargetFixupKind && Kind < MaxTargetFixupKind && - "Invalid kind!"); - return Infos[Kind - FirstTargetFixupKind]; - } - - bool AddRegToInstr(const MCInst &MI, MachineInstr *Instr, - unsigned Start) const { - if (Start + 1 > MI.getNumOperands()) - return false; - - const MCOperand &Op = MI.getOperand(Start); - if (!Op.isReg()) return false; - - Instr->addOperand(MachineOperand::CreateReg(Op.getReg(), false)); - return true; - } - - bool AddImmToInstr(const MCInst &MI, MachineInstr *Instr, - unsigned Start) const { - if (Start + 1 > MI.getNumOperands()) - return false; - - const MCOperand &Op = MI.getOperand(Start); - if (Op.isImm()) { - Instr->addOperand(MachineOperand::CreateImm(Op.getImm())); - return true; - } - if (!Op.isExpr()) - return false; - - const MCExpr *Expr = Op.getExpr(); - if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) { - Instr->addOperand(MachineOperand::CreateImm(CE->getValue())); - return true; - } - - // Fake this as an external symbol to the code emitter to add a relcoation - // entry we will recognize. - Instr->addOperand(MachineOperand::CreateJTI(Start, 0)); - return true; - } - - bool AddLMemToInstr(const MCInst &MI, MachineInstr *Instr, - unsigned Start) const { - return (AddRegToInstr(MI, Instr, Start + 0) && - AddImmToInstr(MI, Instr, Start + 1) && - AddRegToInstr(MI, Instr, Start + 2) && - AddImmToInstr(MI, Instr, Start + 3)); - } - - bool AddMemToInstr(const MCInst &MI, MachineInstr *Instr, - unsigned Start) const { - return (AddRegToInstr(MI, Instr, Start + 0) && - AddImmToInstr(MI, Instr, Start + 1) && - AddRegToInstr(MI, Instr, Start + 2) && - AddImmToInstr(MI, Instr, Start + 3) && - AddRegToInstr(MI, Instr, Start + 4)); - } - - void EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups) const { - // Don't look yet! - - // Convert the MCInst to a MachineInstr so we can (ab)use the regular - // emitter. - const X86InstrInfo &II = *TM.getInstrInfo(); - const TargetInstrDesc &Desc = II.get(MI.getOpcode()); - MachineInstr *Instr = DummyMF->CreateMachineInstr(Desc, DebugLoc()); - DummyMBB->push_back(Instr); - - unsigned Opcode = MI.getOpcode(); - unsigned NumOps = MI.getNumOperands(); - unsigned CurOp = 0; - bool AddTied = false; - if (NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1) - AddTied = true; - else if (NumOps > 2 && - Desc.getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) - // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 - --NumOps; - - bool OK = true; - switch (Desc.TSFlags & X86II::FormMask) { - case X86II::MRMDestReg: - case X86II::MRMSrcReg: - // Matching doesn't fill this in completely, we have to choose operand 0 - // for a tied register. - OK &= AddRegToInstr(MI, Instr, CurOp++); - if (AddTied) - OK &= AddRegToInstr(MI, Instr, CurOp++ - 1); - OK &= AddRegToInstr(MI, Instr, CurOp++); - if (CurOp < NumOps) - OK &= AddImmToInstr(MI, Instr, CurOp); - break; - - case X86II::RawFrm: - if (CurOp < NumOps) { - // Hack to make branches work. - if (!(Desc.TSFlags & X86II::ImmMask) && - MI.getOperand(0).isExpr() && - isa<MCSymbolRefExpr>(MI.getOperand(0).getExpr())) - Instr->addOperand(MachineOperand::CreateMBB(DummyMBB)); - else - OK &= AddImmToInstr(MI, Instr, CurOp); - } - break; - - case X86II::AddRegFrm: - // Matching doesn't fill this in completely, we have to choose operand 0 - // for a tied register. - OK &= AddRegToInstr(MI, Instr, CurOp++); - if (AddTied) - OK &= AddRegToInstr(MI, Instr, CurOp++ - 1); - if (CurOp < NumOps) - OK &= AddImmToInstr(MI, Instr, CurOp); - break; - - case X86II::MRM0r: case X86II::MRM1r: - case X86II::MRM2r: case X86II::MRM3r: - case X86II::MRM4r: case X86II::MRM5r: - case X86II::MRM6r: case X86II::MRM7r: - // Matching doesn't fill this in completely, we have to choose operand 0 - // for a tied register. - OK &= AddRegToInstr(MI, Instr, CurOp++); - if (AddTied) - OK &= AddRegToInstr(MI, Instr, CurOp++ - 1); - if (CurOp < NumOps) - OK &= AddImmToInstr(MI, Instr, CurOp); - break; - - case X86II::MRM0m: case X86II::MRM1m: - case X86II::MRM2m: case X86II::MRM3m: - case X86II::MRM4m: case X86II::MRM5m: - case X86II::MRM6m: case X86II::MRM7m: - OK &= AddMemToInstr(MI, Instr, CurOp); CurOp += 5; - if (CurOp < NumOps) - OK &= AddImmToInstr(MI, Instr, CurOp); - break; - - case X86II::MRMSrcMem: - // Matching doesn't fill this in completely, we have to choose operand 0 - // for a tied register. - OK &= AddRegToInstr(MI, Instr, CurOp++); - if (AddTied) - OK &= AddRegToInstr(MI, Instr, CurOp++ - 1); - if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || - Opcode == X86::LEA16r || Opcode == X86::LEA32r) - OK &= AddLMemToInstr(MI, Instr, CurOp); - else - OK &= AddMemToInstr(MI, Instr, CurOp); - break; - - case X86II::MRMDestMem: - OK &= AddMemToInstr(MI, Instr, CurOp); CurOp += 5; - OK &= AddRegToInstr(MI, Instr, CurOp); - break; - - default: - case X86II::MRMInitReg: - case X86II::Pseudo: - OK = false; - break; - } - - if (!OK) { - dbgs() << "couldn't convert inst '"; - MI.dump(); - dbgs() << "' to machine instr:\n"; - Instr->dump(); - } - - InstrEmitter->reset(&MI, &Fixups); - if (OK) - Emit->emitInstruction(*Instr, &Desc); - OS << InstrEmitter->str(); - - Instr->eraseFromParent(); - } -}; -} - -#include "llvm/Support/CommandLine.h" - -static cl::opt<bool> EnableNewEncoder("enable-new-x86-encoder", - cl::ReallyHidden); - - -// Ok, now you can look. -MCCodeEmitter *llvm::createHeinousX86MCCodeEmitter(const Target &T, - TargetMachine &TM) { - - // FIXME: Remove the heinous one when the new one works. - if (EnableNewEncoder) { - if (TM.getTargetData()->getPointerSize() == 4) - return createX86_32MCCodeEmitter(T, TM); - return createX86_64MCCodeEmitter(T, TM); - } - - return new X86MCCodeEmitter(static_cast<X86TargetMachine&>(TM)); -} diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index ea398e9..98e3f4e 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -388,6 +388,8 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) { } case Instruction::GetElementPtr: { + X86AddressMode SavedAM = AM; + // Pattern-match simple GEPs. uint64_t Disp = (int32_t)AM.Disp; unsigned IndexReg = AM.IndexReg; @@ -428,7 +430,13 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) { AM.IndexReg = IndexReg; AM.Scale = Scale; AM.Disp = (uint32_t)Disp; - return X86SelectAddress(U->getOperand(0), AM); + if (X86SelectAddress(U->getOperand(0), AM)) + return true; + + // If we couldn't merge the sub value into this addr mode, revert back to + // our address and just match the value instead of completely failing. + AM = SavedAM; + break; unsupported_gep: // Ok, the GEP indices weren't all covered. break; @@ -786,8 +794,8 @@ bool X86FastISel::X86SelectCmp(Instruction *I) { bool X86FastISel::X86SelectZExt(Instruction *I) { // Handle zero-extension from i1 to i8, which is common. - if (I->getType()->isInteger(8) && - I->getOperand(0)->getType()->isInteger(1)) { + if (I->getType()->isIntegerTy(8) && + I->getOperand(0)->getType()->isIntegerTy(1)) { unsigned ResultReg = getRegForValue(I->getOperand(0)); if (ResultReg == 0) return false; // Set the high bits to zero. @@ -828,30 +836,30 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { std::swap(TrueMBB, FalseMBB); Predicate = CmpInst::FCMP_UNE; // FALL THROUGH - case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE; break; - case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA; break; - case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE; break; - case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA; break; - case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE; break; - case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE; break; - case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP; break; - case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP; break; - case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE; break; - case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB; break; - case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE; break; - case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB; break; - case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break; + case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE_4; break; + case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA_4; break; + case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE_4; break; + case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA_4; break; + case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE_4; break; + case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE_4; break; + case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP_4; break; + case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP_4; break; + case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE_4; break; + case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB_4; break; + case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE_4; break; + case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break; + case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break; - case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE; break; - case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE; break; - case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA; break; - case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE; break; - case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB; break; - case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break; - case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG; break; - case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE; break; - case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL; break; - case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE; break; + case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE_4; break; + case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE_4; break; + case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4; break; + case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE_4; break; + case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break; + case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break; + case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG_4; break; + case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE_4; break; + case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL_4; break; + case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE_4; break; default: return false; } @@ -869,7 +877,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { if (Predicate == CmpInst::FCMP_UNE) { // X86 requires a second branch to handle UNE (and OEQ, // which is mapped to UNE above). - BuildMI(MBB, DL, TII.get(X86::JP)).addMBB(TrueMBB); + BuildMI(MBB, DL, TII.get(X86::JP_4)).addMBB(TrueMBB); } FastEmitBranch(FalseMBB); @@ -923,7 +931,8 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { unsigned OpCode = SetMI->getOpcode(); if (OpCode == X86::SETOr || OpCode == X86::SETBr) { - BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ? X86::JO : X86::JB)) + BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ? + X86::JO_4 : X86::JB_4)) .addMBB(TrueMBB); FastEmitBranch(FalseMBB); MBB->addSuccessor(TrueMBB); @@ -939,7 +948,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { if (OpReg == 0) return false; BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg); - BuildMI(MBB, DL, TII.get(X86::JNE)).addMBB(TrueMBB); + BuildMI(MBB, DL, TII.get(X86::JNE_4)).addMBB(TrueMBB); FastEmitBranch(FalseMBB); MBB->addSuccessor(TrueMBB); return true; @@ -948,7 +957,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { bool X86FastISel::X86SelectShift(Instruction *I) { unsigned CReg = 0, OpReg = 0, OpImm = 0; const TargetRegisterClass *RC = NULL; - if (I->getType()->isInteger(8)) { + if (I->getType()->isIntegerTy(8)) { CReg = X86::CL; RC = &X86::GR8RegClass; switch (I->getOpcode()) { @@ -957,7 +966,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break; default: return false; } - } else if (I->getType()->isInteger(16)) { + } else if (I->getType()->isIntegerTy(16)) { CReg = X86::CX; RC = &X86::GR16RegClass; switch (I->getOpcode()) { @@ -966,7 +975,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break; default: return false; } - } else if (I->getType()->isInteger(32)) { + } else if (I->getType()->isIntegerTy(32)) { CReg = X86::ECX; RC = &X86::GR32RegClass; switch (I->getOpcode()) { @@ -975,7 +984,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break; default: return false; } - } else if (I->getType()->isInteger(64)) { + } else if (I->getType()->isIntegerTy(64)) { CReg = X86::RCX; RC = &X86::GR64RegClass; switch (I->getOpcode()) { @@ -1160,6 +1169,8 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) { if (!X86SelectAddress(DI->getAddress(), AM)) return false; const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); + // FIXME may need to add RegState::Debug to any registers produced, + // although ESP/EBP should be the only ones at the moment. addFullAddress(BuildMI(MBB, DL, II), AM).addImm(0). addMetadata(DI->getVariable()); return true; diff --git a/lib/Target/X86/X86FixupKinds.h b/lib/Target/X86/X86FixupKinds.h new file mode 100644 index 0000000..c8dac3c --- /dev/null +++ b/lib/Target/X86/X86FixupKinds.h @@ -0,0 +1,25 @@ +//===-- X86/X86FixupKinds.h - X86 Specific Fixup Entries --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_X86_X86FIXUPKINDS_H +#define LLVM_X86_X86FIXUPKINDS_H + +#include "llvm/MC/MCFixup.h" + +namespace llvm { +namespace X86 { +enum Fixups { + reloc_pcrel_4byte = FirstTargetFixupKind, // 32-bit pcrel, e.g. a branch. + reloc_pcrel_1byte, // 8-bit pcrel, e.g. branch_1 + reloc_riprel_4byte // 32-bit rip-relative +}; +} +} + +#endif diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp index 34a0045..6a117dd 100644 --- a/lib/Target/X86/X86FloatingPointRegKill.cpp +++ b/lib/Target/X86/X86FloatingPointRegKill.cpp @@ -118,7 +118,7 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) { for (BasicBlock::const_iterator II = SI->begin(); (PN = dyn_cast<PHINode>(II)); ++II) { if (PN->getType()==Type::getX86_FP80Ty(LLVMBB->getContext()) || - (!Subtarget.hasSSE1() && PN->getType()->isFloatingPoint()) || + (!Subtarget.hasSSE1() && PN->getType()->isFloatingPointTy()) || (!Subtarget.hasSSE2() && PN->getType()==Type::getDoubleTy(LLVMBB->getContext()))) { ContainsFPCode = true; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index e44ce421..3fad8ad 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -12,15 +12,6 @@ // //===----------------------------------------------------------------------===// -// Force NDEBUG on in any optimized build on Darwin. -// -// FIXME: This is a huge hack, to work around ridiculously awful compile times -// on this file with gcc-4.2 on Darwin, in Release mode. -#if (!defined(__llvm__) && defined(__APPLE__) && \ - defined(__OPTIMIZE__) && !defined(NDEBUG)) -#define NDEBUG -#endif - #define DEBUG_TYPE "x86-isel" #include "X86.h" #include "X86InstrBuilder.h" @@ -177,14 +168,11 @@ namespace { return "X86 DAG->DAG Instruction Selection"; } - /// InstructionSelect - This callback is invoked by - /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. - virtual void InstructionSelect(); - virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF); - virtual - bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const; + virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const; + + virtual void PreprocessISelDAG(); // Include the pieces autogenerated from the target description. #include "X86GenDAGISel.inc" @@ -208,18 +196,17 @@ namespace { SDValue &Scale, SDValue &Index, SDValue &Disp); bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp); - bool SelectScalarSSELoad(SDNode *Op, SDValue Pred, - SDValue N, SDValue &Base, SDValue &Scale, + bool SelectScalarSSELoad(SDNode *Root, SDValue N, + SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment, - SDValue &InChain, SDValue &OutChain); + SDValue &NodeWithChain); + bool TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); - void PreprocessForRMW(); - void PreprocessForFPConvert(); - + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, @@ -295,19 +282,22 @@ namespace { const X86InstrInfo *getInstrInfo() { return getTargetMachine().getInstrInfo(); } - -#ifndef NDEBUG - unsigned Indent; -#endif }; } -bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, - SDNode *Root) const { +bool +X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { if (OptLevel == CodeGenOpt::None) return false; - if (U == Root) + if (!N.hasOneUse()) + return false; + + if (N.getOpcode() != ISD::LOAD) + return true; + + // If N is a load, do additional profitability checks. + if (U == Root) { switch (U->getOpcode()) { default: break; case X86ISD::ADD: @@ -354,60 +344,9 @@ bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, } } } - - // Proceed to 'generic' cycle finder code - return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root); -} - -/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand -/// and move load below the TokenFactor. Replace store's chain operand with -/// load's chain result. -static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load, - SDValue Store, SDValue TF) { - SmallVector<SDValue, 4> Ops; - for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i) - if (Load.getNode() == TF.getOperand(i).getNode()) - Ops.push_back(Load.getOperand(0)); - else - Ops.push_back(TF.getOperand(i)); - SDValue NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size()); - SDValue NewLoad = CurDAG->UpdateNodeOperands(Load, NewTF, - Load.getOperand(1), - Load.getOperand(2)); - CurDAG->UpdateNodeOperands(Store, NewLoad.getValue(1), Store.getOperand(1), - Store.getOperand(2), Store.getOperand(3)); -} - -/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. The -/// chain produced by the load must only be used by the store's chain operand, -/// otherwise this may produce a cycle in the DAG. -/// -static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address, - SDValue &Load) { - if (N.getOpcode() == ISD::BIT_CONVERT) { - if (!N.hasOneUse()) - return false; - N = N.getOperand(0); } - LoadSDNode *LD = dyn_cast<LoadSDNode>(N); - if (!LD || LD->isVolatile()) - return false; - if (LD->getAddressingMode() != ISD::UNINDEXED) - return false; - - ISD::LoadExtType ExtType = LD->getExtensionType(); - if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD) - return false; - - if (N.hasOneUse() && - LD->hasNUsesOfValue(1, 1) && - N.getOperand(1) == Address && - LD->isOperandOf(Chain.getNode())) { - Load = N; - return true; - } - return false; + return true; } /// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain @@ -473,51 +412,15 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain) { return false; } - -/// PreprocessForRMW - Preprocess the DAG to make instruction selection better. -/// This is only run if not in -O0 mode. -/// This allows the instruction selector to pick more read-modify-write -/// instructions. This is a common case: -/// -/// [Load chain] -/// ^ -/// | -/// [Load] -/// ^ ^ -/// | | -/// / \- -/// / | -/// [TokenFactor] [Op] -/// ^ ^ -/// | | -/// \ / -/// \ / -/// [Store] -/// -/// The fact the store's chain operand != load's chain will prevent the -/// (store (op (load))) instruction from being selected. We can transform it to: -/// -/// [Load chain] -/// ^ -/// | -/// [TokenFactor] -/// ^ -/// | -/// [Load] -/// ^ ^ -/// | | -/// | \- -/// | | -/// | [Op] -/// | ^ -/// | | -/// \ / -/// \ / -/// [Store] -void X86DAGToDAGISel::PreprocessForRMW() { +void X86DAGToDAGISel::PreprocessISelDAG() { + // OptForSize is used in pattern predicates that isel is matching. + OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize); + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), - E = CurDAG->allnodes_end(); I != E; ++I) { - if (I->getOpcode() == X86ISD::CALL) { + E = CurDAG->allnodes_end(); I != E; ) { + SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. + + if (OptLevel != CodeGenOpt::None && N->getOpcode() == X86ISD::CALL) { /// Also try moving call address load from outside callseq_start to just /// before the call to allow it to be folded. /// @@ -537,85 +440,23 @@ void X86DAGToDAGISel::PreprocessForRMW() { /// \ / /// \ / /// [CALL] - SDValue Chain = I->getOperand(0); - SDValue Load = I->getOperand(1); + SDValue Chain = N->getOperand(0); + SDValue Load = N->getOperand(1); if (!isCalleeLoad(Load, Chain)) continue; - MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain); + MoveBelowCallSeqStart(CurDAG, Load, SDValue(N, 0), Chain); ++NumLoadMoved; continue; } - - if (!ISD::isNON_TRUNCStore(I)) - continue; - SDValue Chain = I->getOperand(0); - - if (Chain.getNode()->getOpcode() != ISD::TokenFactor) - continue; - - SDValue N1 = I->getOperand(1); - SDValue N2 = I->getOperand(2); - if ((N1.getValueType().isFloatingPoint() && - !N1.getValueType().isVector()) || - !N1.hasOneUse()) - continue; - - bool RModW = false; - SDValue Load; - unsigned Opcode = N1.getNode()->getOpcode(); - switch (Opcode) { - case ISD::ADD: - case ISD::MUL: - case ISD::AND: - case ISD::OR: - case ISD::XOR: - case ISD::ADDC: - case ISD::ADDE: - case ISD::VECTOR_SHUFFLE: { - SDValue N10 = N1.getOperand(0); - SDValue N11 = N1.getOperand(1); - RModW = isRMWLoad(N10, Chain, N2, Load); - if (!RModW) - RModW = isRMWLoad(N11, Chain, N2, Load); - break; - } - case ISD::SUB: - case ISD::SHL: - case ISD::SRA: - case ISD::SRL: - case ISD::ROTL: - case ISD::ROTR: - case ISD::SUBC: - case ISD::SUBE: - case X86ISD::SHLD: - case X86ISD::SHRD: { - SDValue N10 = N1.getOperand(0); - RModW = isRMWLoad(N10, Chain, N2, Load); - break; - } - } - - if (RModW) { - MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain); - ++NumLoadMoved; - checkForCycles(I); - } - } -} - - -/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend -/// nodes that target the FP stack to be store and load to the stack. This is a -/// gross hack. We would like to simply mark these as being illegal, but when -/// we do that, legalize produces these when it expands calls, then expands -/// these in the same legalize pass. We would like dag combine to be able to -/// hack on these between the call expansion and the node legalization. As such -/// this pass basically does "really late" legalization of these inline with the -/// X86 isel pass. -void X86DAGToDAGISel::PreprocessForFPConvert() { - for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), - E = CurDAG->allnodes_end(); I != E; ) { - SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. + + // Lower fpround and fpextend nodes that target the FP stack to be store and + // load to the stack. This is a gross hack. We would like to simply mark + // these as being illegal, but when we do that, legalize produces these when + // it expands calls, then expands these in the same legalize pass. We would + // like dag combine to be able to hack on these between the call expansion + // and the node legalization. As such this pass basically does "really + // late" legalization of these inline with the X86 isel pass. + // FIXME: This should only happen when not compiled with -O0. if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) continue; @@ -652,9 +493,10 @@ void X86DAGToDAGISel::PreprocessForFPConvert() { // FIXME: optimize the case where the src/dest is a load or store? SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0), - MemTmp, NULL, 0, MemVT); + MemTmp, NULL, 0, MemVT, + false, false, 0); SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp, - NULL, 0, MemVT); + NULL, 0, MemVT, false, false, 0); // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the // extload we created. This will cause general havok on the dag because @@ -670,30 +512,6 @@ void X86DAGToDAGISel::PreprocessForFPConvert() { } } -/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel -/// when it has created a SelectionDAG for us to codegen. -void X86DAGToDAGISel::InstructionSelect() { - const Function *F = MF->getFunction(); - OptForSize = F->hasFnAttr(Attribute::OptimizeForSize); - - if (OptLevel != CodeGenOpt::None) - PreprocessForRMW(); - - // FIXME: This should only happen when not compiled with -O0. - PreprocessForFPConvert(); - - // Codegen the basic block. -#ifndef NDEBUG - DEBUG(dbgs() << "===== Instruction selection begins:\n"); - Indent = 0; -#endif - SelectRoot(*CurDAG); -#ifndef NDEBUG - DEBUG(dbgs() << "===== Instruction selection ends:\n"); -#endif - - CurDAG->RemoveDeadNodes(); -} /// EmitSpecialCodeForMain - Emit any code that needs to be executed only in /// the main function. @@ -1300,22 +1118,24 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base, /// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to /// match a load whose top elements are either undef or zeros. The load flavor /// is derived from the type of N, which is either v4f32 or v2f64. -bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred, +/// +/// We also return: +/// PatternChainNode: this is the matched node that has a chain input and +/// output. +bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment, - SDValue &InChain, - SDValue &OutChain) { + SDValue &PatternNodeWithChain) { if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) { - InChain = N.getOperand(0).getValue(1); - if (ISD::isNON_EXTLoad(InChain.getNode()) && - InChain.getValue(0).hasOneUse() && - N.hasOneUse() && - IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op)) { - LoadSDNode *LD = cast<LoadSDNode>(InChain); - if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) + PatternNodeWithChain = N.getOperand(0); + if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) && + PatternNodeWithChain.hasOneUse() && + IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && + IsLegalToFold(N.getOperand(0), N.getNode(), Root)) { + LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain); + if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp,Segment)) return false; - OutChain = LD->getChain(); return true; } } @@ -1327,13 +1147,14 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred, N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && N.getOperand(0).getNode()->hasOneUse() && ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) && - N.getOperand(0).getOperand(0).hasOneUse()) { + N.getOperand(0).getOperand(0).hasOneUse() && + IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && + IsLegalToFold(N.getOperand(0), N.getNode(), Root)) { // Okay, this is a zero extending load. Fold it. LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0)); - if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) + if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) return false; - OutChain = LD->getChain(); - InChain = SDValue(LD, 1); + PatternNodeWithChain = SDValue(LD, 0); return true; } return false; @@ -1407,7 +1228,6 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp) { - assert(Op->getOpcode() == X86ISD::TLSADDR); assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); @@ -1434,11 +1254,12 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { - if (ISD::isNON_EXTLoad(N.getNode()) && - N.hasOneUse() && - IsLegalAndProfitableToFold(N.getNode(), P, P)) - return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment); - return false; + if (!ISD::isNON_EXTLoad(N.getNode()) || + !IsProfitableToFold(N, P, P) || + !IsLegalToFold(N, P, P)) + return false; + + return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment); } /// getGlobalBaseReg - Return an SDNode that returns the value of @@ -1541,7 +1362,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_DEC16m; else if (isSub) { if (isCN) { - if (Predicate_i16immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_SUB16mi8; else Opc = X86::LOCK_SUB16mi; @@ -1549,7 +1370,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_SUB16mr; } else { if (isCN) { - if (Predicate_i16immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_ADD16mi8; else Opc = X86::LOCK_ADD16mi; @@ -1564,7 +1385,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_DEC32m; else if (isSub) { if (isCN) { - if (Predicate_i32immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_SUB32mi8; else Opc = X86::LOCK_SUB32mi; @@ -1572,7 +1393,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_SUB32mr; } else { if (isCN) { - if (Predicate_i32immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_ADD32mi8; else Opc = X86::LOCK_ADD32mi; @@ -1588,7 +1409,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { else if (isSub) { Opc = X86::LOCK_SUB64mr; if (isCN) { - if (Predicate_i64immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_SUB64mi8; else if (Predicate_i64immSExt32(Val.getNode())) Opc = X86::LOCK_SUB64mi32; @@ -1596,7 +1417,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { } else { Opc = X86::LOCK_ADD64mr; if (isCN) { - if (Predicate_i64immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_ADD64mi8; else if (Predicate_i64immSExt32(Val.getNode())) Opc = X86::LOCK_ADD64mi32; @@ -1652,8 +1473,8 @@ static bool HasNoSignedComparisonUses(SDNode *N) { case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr: case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm: case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm: - case X86::JA: case X86::JAE: case X86::JB: case X86::JBE: - case X86::JE: case X86::JNE: case X86::JP: case X86::JNP: + case X86::JA_4: case X86::JAE_4: case X86::JB_4: case X86::JBE_4: + case X86::JE_4: case X86::JNE_4: case X86::JP_4: case X86::JNP_4: case X86::CMOVA16rr: case X86::CMOVA16rm: case X86::CMOVA32rr: case X86::CMOVA32rm: case X86::CMOVA64rr: case X86::CMOVA64rm: @@ -1693,24 +1514,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { unsigned Opcode = Node->getOpcode(); DebugLoc dl = Node->getDebugLoc(); -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent, ' ') << "Selecting: "; - Node->dump(CurDAG); - dbgs() << '\n'; - }); - Indent += 2; -#endif + DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n'); if (Node->isMachineOpcode()) { -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent-2, ' ') << "== "; - Node->dump(CurDAG); - dbgs() << '\n'; - }); - Indent -= 2; -#endif + DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n'); return NULL; // Already selected. } @@ -1806,13 +1613,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { LoReg, NVT, InFlag); InFlag = Result.getValue(2); ReplaceUses(SDValue(Node, 0), Result); -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent-2, ' ') << "=> "; - Result.getNode()->dump(CurDAG); - dbgs() << '\n'; - }); -#endif + DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } // Copy the high half of the result, if it is needed. if (!SDValue(Node, 1).use_empty()) { @@ -1835,19 +1636,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { InFlag = Result.getValue(2); } ReplaceUses(SDValue(Node, 1), Result); -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent-2, ' ') << "=> "; - Result.getNode()->dump(CurDAG); - dbgs() << '\n'; - }); -#endif + DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } -#ifndef NDEBUG - Indent -= 2; -#endif - return NULL; } @@ -1962,13 +1753,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { LoReg, NVT, InFlag); InFlag = Result.getValue(2); ReplaceUses(SDValue(Node, 0), Result); -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent-2, ' ') << "=> "; - Result.getNode()->dump(CurDAG); - dbgs() << '\n'; - }); -#endif + DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } // Copy the remainder (high) result, if it is needed. if (!SDValue(Node, 1).use_empty()) { @@ -1992,19 +1777,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { InFlag = Result.getValue(2); } ReplaceUses(SDValue(Node, 1), Result); -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent-2, ' ') << "=> "; - Result.getNode()->dump(CurDAG); - dbgs() << '\n'; - }); -#endif + DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } - -#ifndef NDEBUG - Indent -= 2; -#endif - return NULL; } @@ -2117,17 +1891,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDNode *ResNode = SelectCode(Node); -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent-2, ' ') << "=> "; - if (ResNode == NULL || ResNode == Node) - Node->dump(CurDAG); - else - ResNode->dump(CurDAG); - dbgs() << '\n'; - }); - Indent -= 2; -#endif + DEBUG(dbgs() << "=> "; + if (ResNode == NULL || ResNode == Node) + Node->dump(CurDAG); + else + ResNode->dump(CurDAG); + dbgs() << '\n'); return ResNode; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 515bc84..802bedc 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -73,15 +73,16 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { case X86Subtarget::isDarwin: if (TM.getSubtarget<X86Subtarget>().is64Bit()) return new X8664_MachoTargetObjectFile(); - return new X8632_MachoTargetObjectFile(); + return new TargetLoweringObjectFileMachO(); case X86Subtarget::isELF: - return new TargetLoweringObjectFileELF(); + if (TM.getSubtarget<X86Subtarget>().is64Bit()) + return new X8664_ELFTargetObjectFile(TM); + return new X8632_ELFTargetObjectFile(TM); case X86Subtarget::isMingw: case X86Subtarget::isCygwin: case X86Subtarget::isWindows: return new TargetLoweringObjectFileCOFF(); } - } X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) @@ -1001,19 +1002,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) computeRegisterProperties(); - // Divide and reminder operations have no vector equivalent and can - // trap. Do a custom widening for these operations in which we never - // generate more divides/remainder than the original vector width. - for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { - if (!isTypeLegal((MVT::SimpleValueType)VT)) { - setOperationAction(ISD::SDIV, (MVT::SimpleValueType) VT, Custom); - setOperationAction(ISD::UDIV, (MVT::SimpleValueType) VT, Custom); - setOperationAction(ISD::SREM, (MVT::SimpleValueType) VT, Custom); - setOperationAction(ISD::UREM, (MVT::SimpleValueType) VT, Custom); - } - } - // FIXME: These should be based on subtarget info. Plus, the values should // be smaller when we are in optimizing for size mode. maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores @@ -1411,18 +1399,6 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const { return CC_X86_32_C; } -/// NameDecorationForCallConv - Selects the appropriate decoration to -/// apply to a MachineFunction containing a given calling convention. -NameDecorationStyle -X86TargetLowering::NameDecorationForCallConv(CallingConv::ID CallConv) { - if (CallConv == CallingConv::X86_FastCall) - return FastCall; - else if (CallConv == CallingConv::X86_StdCall) - return StdCall; - return None; -} - - /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified /// by "Src" to address "Dst" with size and alignment information specified by /// the specific parameter attribute. The copy will be passed as a byval @@ -1476,7 +1452,8 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, VA.getLocMemOffset(), isImmutable, false); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); return DAG.getLoad(ValVT, dl, Chain, FIN, - PseudoSourceValue::getFixedStack(FI), 0); + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0); } } @@ -1498,9 +1475,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, Fn->getName() == "main") FuncInfo->setForceFramePointer(true); - // Decorate the function name. - FuncInfo->setDecorationStyle(NameDecorationForCallConv(CallConv)); - MachineFrameInfo *MFI = MF.getFrameInfo(); bool Is64Bit = Subtarget->is64Bit(); bool IsWin64 = Subtarget->isTargetWin64(); @@ -1573,7 +1547,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // If value is passed via pointer - do a load. if (VA.getLocInfo() == CCValAssign::Indirect) - ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0); + ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0, + false, false, 0); InVals.push_back(ArgValue); } @@ -1668,7 +1643,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, PseudoSourceValue::getFixedStack(RegSaveFrameIndex), - Offset); + Offset, false, false, 0); MemOps.push_back(Store); Offset += 8; } @@ -1737,7 +1712,8 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain, return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); } return DAG.getStore(Chain, dl, Arg, PtrOff, - PseudoSourceValue::getStack(), LocMemOffset); + PseudoSourceValue::getStack(), LocMemOffset, + false, false, 0); } /// EmitTailCallLoadRetAddr - Emit a load of return address if tail call @@ -1752,7 +1728,7 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, OutRetAddr = getReturnAddressFrameIndex(DAG); // Load the "old" Return address. - OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0); + OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0, false, false, 0); return SDValue(OutRetAddr.getNode(), 1); } @@ -1767,11 +1743,12 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, // Calculate the new stack slot for the return address. int SlotSize = Is64Bit ? 8 : 4; int NewReturnAddrFI = - MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, true,false); + MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false, false); EVT VT = Is64Bit ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, - PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0); + PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0, + false, false, 0); return Chain; } @@ -1882,7 +1859,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); Chain = DAG.getStore(Chain, dl, Arg, SpillSlot, - PseudoSourceValue::getFixedStack(FI), 0); + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0); Arg = SpillSlot; break; } @@ -2013,7 +1991,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Store relative to framepointer. MemOpChains2.push_back( DAG.getStore(ArgChain, dl, Arg, FIN, - PseudoSourceValue::getFixedStack(FI), 0)); + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0)); } } } @@ -2256,7 +2235,8 @@ static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, const X86InstrInfo *TII) { - int FI; + unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; + int FI = INT_MAX; if (Arg.getOpcode() == ISD::CopyFromReg) { unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); if (!VR || TargetRegisterInfo::isPhysicalRegister(VR)) @@ -2272,25 +2252,30 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) && Def->getOperand(1).isFI()) { FI = Def->getOperand(1).getIndex(); - if (MFI->getObjectSize(FI) != Flags.getByValSize()) - return false; + Bytes = Flags.getByValSize(); } else return false; } - } else { - LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg); - if (!Ld) + } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { + if (Flags.isByVal()) + // ByVal argument is passed in as a pointer but it's now being + // dereferenced. e.g. + // define @foo(%struct.X* %A) { + // tail call @bar(%struct.X* byval %A) + // } return false; SDValue Ptr = Ld->getBasePtr(); FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); if (!FINode) return false; FI = FINode->getIndex(); - } + } else + return false; + assert(FI != INT_MAX); if (!MFI->isFixedObjectIndex(FI)) return false; - return Offset == MFI->getObjectOffset(FI); + return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); } /// IsEligibleForTailCallOptimization - Check whether the call is eligible @@ -2397,7 +2382,7 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { // Set up a frame object for the return address. uint64_t SlotSize = TD->getPointerSize(); ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize, - true, false); + false, false); FuncInfo->setRAIndex(ReturnAddrIndex); } @@ -3592,7 +3577,8 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, int EltNo = (Offset - StartOffset) >> 2; int Mask[4] = { EltNo, EltNo, EltNo, EltNo }; EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32; - SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0); + SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0, + false, false, 0); // Canonicalize it to a v4i32 shuffle. V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, V1); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, @@ -4836,8 +4822,16 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){ if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) && isa<ConstantSDNode>(N2)) { - unsigned Opc = (EltVT.getSizeInBits() == 8) ? X86ISD::PINSRB - : X86ISD::PINSRW; + unsigned Opc; + if (VT == MVT::v8i16) + Opc = X86ISD::PINSRW; + else if (VT == MVT::v4i16) + Opc = X86ISD::MMX_PINSRW; + else if (VT == MVT::v16i8) + Opc = X86ISD::PINSRB; + else + Opc = X86ISD::PINSRB; + // Transform it so it match pinsr{b,w} which expects a GR32 as its second // argument. if (N1.getValueType() != MVT::i32) @@ -4888,7 +4882,8 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1); if (N2.getValueType() != MVT::i32) N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue()); - return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2); + return DAG.getNode(VT == MVT::v8i16 ? X86ISD::PINSRW : X86ISD::MMX_PINSRW, + dl, VT, N0, N1, N2); } return SDValue(); } @@ -5091,7 +5086,7 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, // load. if (isGlobalStubReference(OpFlags)) Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result, - PseudoSourceValue::getGOT(), 0); + PseudoSourceValue::getGOT(), 0, false, false, 0); // If there was a non-zero offset that we didn't fold, create an explicit // addition for it. @@ -5171,7 +5166,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, MVT::i32)); SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Base, - NULL, 0); + NULL, 0, false, false, 0); unsigned char OperandFlags = 0; // Most TLS accesses are not RIP relative, even on x86-64. One exception is @@ -5196,7 +5191,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, if (model == TLSModel::InitialExec) Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset, - PseudoSourceValue::getGOT(), 0); + PseudoSourceValue::getGOT(), 0, false, false, 0); // The address of the thread local variable is the add of the thread // pointer with the offset of the variable. @@ -5264,7 +5259,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt, DAG.getConstant(VTBits, MVT::i8)); - SDValue Cond = DAG.getNode(X86ISD::CMP, dl, VT, + SDValue Cond = DAG.getNode(X86ISD::CMP, dl, MVT::i32, AndNode, DAG.getConstant(0, MVT::i8)); SDValue Hi, Lo; @@ -5313,7 +5308,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot, - PseudoSourceValue::getFixedStack(SSFI), 0); + PseudoSourceValue::getFixedStack(SSFI), 0, + false, false, 0); return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG); } @@ -5348,7 +5344,8 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, }; Chain = DAG.getNode(X86ISD::FST, dl, Tys, Ops, array_lengthof(Ops)); Result = DAG.getLoad(Op.getValueType(), dl, Chain, StackSlot, - PseudoSourceValue::getFixedStack(SSFI), 0); + PseudoSourceValue::getFixedStack(SSFI), 0, + false, false, 0); } return Result; @@ -5421,12 +5418,12 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2); SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, PseudoSourceValue::getConstantPool(), 0, - false, 16); + false, false, 16); SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0); SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2); SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, PseudoSourceValue::getConstantPool(), 0, - false, 16); + false, false, 16); SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); // Add the halves; easiest way is to swap them into another reg first. @@ -5513,9 +5510,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) { SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackSlot, WordOff); SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), - StackSlot, NULL, 0); + StackSlot, NULL, 0, false, false, 0); SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32), - OffsetSlot, NULL, 0); + OffsetSlot, NULL, 0, false, false, 0); return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG); } @@ -5563,7 +5560,8 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) { if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) { assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!"); Chain = DAG.getStore(Chain, dl, Value, StackSlot, - PseudoSourceValue::getFixedStack(SSFI), 0); + PseudoSourceValue::getFixedStack(SSFI), 0, + false, false, 0); SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) @@ -5597,7 +5595,7 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { // Load the result. return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), - FIST, StackSlot, NULL, 0); + FIST, StackSlot, NULL, 0, false, false, 0); } SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) { @@ -5607,7 +5605,7 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) { // Load the result. return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), - FIST, StackSlot, NULL, 0); + FIST, StackSlot, NULL, 0, false, false, 0); } SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) { @@ -5632,8 +5630,8 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) { Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, - false, 16); + PseudoSourceValue::getConstantPool(), 0, + false, false, 16); return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask); } @@ -5659,8 +5657,8 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) { Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, - false, 16); + PseudoSourceValue::getConstantPool(), 0, + false, false, 16); if (VT.isVector()) { return DAG.getNode(ISD::BIT_CONVERT, dl, VT, DAG.getNode(ISD::XOR, dl, MVT::v2i64, @@ -5708,8 +5706,8 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, - false, 16); + PseudoSourceValue::getConstantPool(), 0, + false, false, 16); SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1); // Shift sign bit right or left if the two operands have different types. @@ -5737,8 +5735,8 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { C = ConstantVector::get(CV); CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, - false, 16); + PseudoSourceValue::getConstantPool(), 0, + false, false, 16); SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2); // Or the value with the sign bit. @@ -5890,26 +5888,31 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, /// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node /// if it's possible. -static SDValue LowerToBT(SDValue Op0, ISD::CondCode CC, +static SDValue LowerToBT(SDValue And, ISD::CondCode CC, DebugLoc dl, SelectionDAG &DAG) { + SDValue Op0 = And.getOperand(0); + SDValue Op1 = And.getOperand(1); + if (Op0.getOpcode() == ISD::TRUNCATE) + Op0 = Op0.getOperand(0); + if (Op1.getOpcode() == ISD::TRUNCATE) + Op1 = Op1.getOperand(0); + SDValue LHS, RHS; - if (Op0.getOperand(1).getOpcode() == ISD::SHL) { - if (ConstantSDNode *Op010C = - dyn_cast<ConstantSDNode>(Op0.getOperand(1).getOperand(0))) - if (Op010C->getZExtValue() == 1) { - LHS = Op0.getOperand(0); - RHS = Op0.getOperand(1).getOperand(1); + if (Op1.getOpcode() == ISD::SHL) { + if (ConstantSDNode *And10C = dyn_cast<ConstantSDNode>(Op1.getOperand(0))) + if (And10C->getZExtValue() == 1) { + LHS = Op0; + RHS = Op1.getOperand(1); } - } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) { - if (ConstantSDNode *Op000C = - dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(0))) - if (Op000C->getZExtValue() == 1) { - LHS = Op0.getOperand(1); - RHS = Op0.getOperand(0).getOperand(1); + } else if (Op0.getOpcode() == ISD::SHL) { + if (ConstantSDNode *And00C = dyn_cast<ConstantSDNode>(Op0.getOperand(0))) + if (And00C->getZExtValue() == 1) { + LHS = Op1; + RHS = Op0.getOperand(1); } - } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) { - ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1)); - SDValue AndLHS = Op0.getOperand(0); + } else if (Op1.getOpcode() == ISD::Constant) { + ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1); + SDValue AndLHS = Op0; if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) { LHS = AndLHS.getOperand(0); RHS = AndLHS.getOperand(1); @@ -5959,6 +5962,21 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { return NewSetCC; } + // Look for "(setcc) == / != 1" to avoid unncessary setcc. + if (Op0.getOpcode() == X86ISD::SETCC && + Op1.getOpcode() == ISD::Constant && + (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 || + cast<ConstantSDNode>(Op1)->isNullValue()) && + (CC == ISD::SETEQ || CC == ISD::SETNE)) { + X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0); + bool Invert = (CC == ISD::SETNE) ^ + cast<ConstantSDNode>(Op1)->isNullValue(); + if (Invert) + CCode = X86::GetOppositeBranchCondition(CCode); + return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1)); + } + bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG); if (X86CC == X86::COND_INVALID) @@ -6400,24 +6418,13 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, EVT IntPtr = getPointerTy(); EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true)); - Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag); Flag = Chain.getValue(1); - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); - SDValue Ops[] = { Chain, - DAG.getTargetExternalSymbol("_alloca", IntPtr), - DAG.getRegister(X86::EAX, IntPtr), - DAG.getRegister(X86StackPtr, SPTy), - Flag }; - Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops, 5); - Flag = Chain.getValue(1); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); - Chain = DAG.getCALLSEQ_END(Chain, - DAG.getIntPtrConstant(0, true), - DAG.getIntPtrConstant(0, true), - Flag); + Chain = DAG.getNode(X86ISD::MINGW_ALLOCA, dl, NodeTys, Chain, Flag); + Flag = Chain.getValue(1); Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1); @@ -6461,8 +6468,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, false, /*isReturnValueUsed=*/false, - DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl, - DAG.GetOrdering(Chain.getNode())); + DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl); return CallResult.second; } @@ -6646,7 +6652,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); - return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0); + return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, + false, false, 0); } // __va_list_tag: @@ -6658,8 +6665,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { SDValue FIN = Op.getOperand(1); // Store gp_offset SDValue Store = DAG.getStore(Op.getOperand(0), dl, - DAG.getConstant(VarArgsGPOffset, MVT::i32), - FIN, SV, 0); + DAG.getConstant(VarArgsGPOffset, MVT::i32), + FIN, SV, 0, false, false, 0); MemOps.push_back(Store); // Store fp_offset @@ -6667,21 +6674,23 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { FIN, DAG.getIntPtrConstant(4)); Store = DAG.getStore(Op.getOperand(0), dl, DAG.getConstant(VarArgsFPOffset, MVT::i32), - FIN, SV, 0); + FIN, SV, 0, false, false, 0); MemOps.push_back(Store); // Store ptr to overflow_arg_area FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getIntPtrConstant(4)); SDValue OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); - Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0); + Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0, + false, false, 0); MemOps.push_back(Store); // Store ptr to reg_save_area. FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getIntPtrConstant(8)); SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); - Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0); + Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0, + false, false, 0); MemOps.push_back(Store); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOps[0], MemOps.size()); @@ -6967,13 +6976,13 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameAddr, Offset), - NULL, 0); + NULL, 0, false, false, 0); } // Just load the return address. SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - RetAddrFI, NULL, 0); + RetAddrFI, NULL, 0, false, false, 0); } SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { @@ -6985,7 +6994,8 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP; SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) - FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0); + FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0, + false, false, 0); return FrameAddr; } @@ -7009,7 +7019,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) SDValue StoreAddr = DAG.getNode(ISD::SUB, dl, getPointerTy(), Frame, DAG.getIntPtrConstant(-TD->getPointerSize())); StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset); - Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0); + Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0, false, false, 0); Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr); MF.getRegInfo().addLiveOut(StoreAddrReg); @@ -7044,11 +7054,12 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11 SDValue Addr = Trmp; OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), - Addr, TrmpAddr, 0); + Addr, TrmpAddr, 0, false, false, 0); Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(2, MVT::i64)); - OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, TrmpAddr, 2, false, 2); + OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, TrmpAddr, 2, + false, false, 2); // Load the 'nest' parameter value into R10. // R10 is specified in X86CallingConv.td @@ -7056,24 +7067,25 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(10, MVT::i64)); OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), - Addr, TrmpAddr, 10); + Addr, TrmpAddr, 10, false, false, 0); Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(12, MVT::i64)); - OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 12, false, 2); + OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 12, + false, false, 2); // Jump to the nested function. OpCode = (JMP64r << 8) | REX_WB; // jmpq *... Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(20, MVT::i64)); OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), - Addr, TrmpAddr, 20); + Addr, TrmpAddr, 20, false, false, 0); unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11 Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(22, MVT::i64)); OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr, - TrmpAddr, 22); + TrmpAddr, 22, false, false, 0); SDValue Ops[] = { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) }; @@ -7133,21 +7145,23 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg); OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(MOV32ri|N86Reg, MVT::i8), - Trmp, TrmpAddr, 0); + Trmp, TrmpAddr, 0, false, false, 0); Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32)); - OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 1, false, 1); + OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 1, + false, false, 1); const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode. Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32)); OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr, - TrmpAddr, 5, false, 1); + TrmpAddr, 5, false, false, 1); Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32)); - OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, TrmpAddr, 6, false, 1); + OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, TrmpAddr, 6, + false, false, 1); SDValue Ops[] = { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4) }; @@ -7190,7 +7204,8 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { DAG.getEntryNode(), StackSlot); // Load FP Control Word from stack slot - SDValue CWD = DAG.getLoad(MVT::i16, dl, Chain, StackSlot, NULL, 0); + SDValue CWD = DAG.getLoad(MVT::i16, dl, Chain, StackSlot, NULL, 0, + false, false, 0); // Transform as necessary SDValue CWD1 = @@ -7554,7 +7569,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, if (FIST.getNode() != 0) { EVT VT = N->getValueType(0); // Return a load from the stack slot. - Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0)); + Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0, + false, false, 0)); } return; } @@ -7572,14 +7588,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(edx.getValue(1)); return; } - case ISD::SDIV: - case ISD::UDIV: - case ISD::SREM: - case ISD::UREM: { - EVT WidenVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - Results.push_back(DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements())); - return; - } case ISD::ATOMIC_CMP_SWAP: { EVT T = N->getValueType(0); assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap"); @@ -7677,6 +7685,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::INSERTPS: return "X86ISD::INSERTPS"; case X86ISD::PINSRB: return "X86ISD::PINSRB"; case X86ISD::PINSRW: return "X86ISD::PINSRW"; + case X86ISD::MMX_PINSRW: return "X86ISD::MMX_PINSRW"; case X86ISD::PSHUFB: return "X86ISD::PSHUFB"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMIN: return "X86ISD::FMIN"; @@ -7721,6 +7730,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; case X86ISD::PTEST: return "X86ISD::PTEST"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; + case X86ISD::MINGW_ALLOCA: return "X86ISD::MINGW_ALLOCA"; } } @@ -7778,13 +7788,13 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { - if (!Ty1->isInteger() || !Ty2->isInteger()) + if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); if (NumBits1 <= NumBits2) return false; - return Subtarget->is64Bit() || NumBits1 < 64; + return true; } bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { @@ -7794,12 +7804,12 @@ bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { unsigned NumBits2 = VT2.getSizeInBits(); if (NumBits1 <= NumBits2) return false; - return Subtarget->is64Bit() || NumBits1 < 64; + return true; } bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const { // x86-64 implicitly zero-extends 32-bit results in 64-bit registers. - return Ty1->isInteger(32) && Ty2->isInteger(64) && Subtarget->is64Bit(); + return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget->is64Bit(); } bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { @@ -7955,7 +7965,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, MIB.addReg(EAXreg); // insert branch - BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB); + BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. return nextMBB; @@ -8112,7 +8122,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, MIB.addReg(X86::EDX); // insert branch - BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB); + BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. return nextMBB; @@ -8215,7 +8225,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, MIB.addReg(X86::EAX); // insert branch - BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB); + BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); F->DeleteMachineInstr(mInstr); // The pseudo instruction is gone now. return nextMBB; @@ -8297,7 +8307,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( if (!Subtarget->isTargetWin64()) { // If %al is 0, branch around the XMM save block. BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg); - BuildMI(MBB, DL, TII->get(X86::JE)).addMBB(EndMBB); + BuildMI(MBB, DL, TII->get(X86::JE_4)).addMBB(EndMBB); MBB->addSuccessor(EndMBB); } @@ -8390,6 +8400,29 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, return BB; } +MachineBasicBlock * +X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI, + MachineBasicBlock *BB, + DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + MachineFunction *F = BB->getParent(); + + // The lowering is pretty easy: we're just emitting the call to _alloca. The + // non-trivial part is impdef of ESP. + // FIXME: The code should be tweaked as soon as we'll try to do codegen for + // mingw-w64. + + BuildMI(BB, DL, TII->get(X86::CALLpcrel32)) + .addExternalSymbol("_alloca") + .addReg(X86::EAX, RegState::Implicit) + .addReg(X86::ESP, RegState::Implicit) + .addReg(X86::EAX, RegState::Define | RegState::Implicit) + .addReg(X86::ESP, RegState::Define | RegState::Implicit); + + F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + return BB; +} MachineBasicBlock * X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, @@ -8397,6 +8430,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { switch (MI->getOpcode()) { default: assert(false && "Unexpected instr type to insert"); + case X86::MINGW_ALLOCA: + return EmitLoweredMingwAlloca(MI, BB, EM); case X86::CMOV_GR8: case X86::CMOV_V1I64: case X86::CMOV_FR32: @@ -8783,10 +8818,11 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, if (DAG.InferPtrAlignment(LD->getBasePtr()) >= 16) return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), LD->getSrcValueOffset(), - LD->isVolatile()); + LD->isVolatile(), LD->isNonTemporal(), 0); return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), LD->getSrcValueOffset(), - LD->isVolatile(), LD->getAlignment()); + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); } else if (NumElems == 4 && LastLoadedElt == 1) { SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); SDValue Ops[] = { LD->getChain(), LD->getBasePtr() }; @@ -8806,10 +8842,9 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, SDValue RHS = N->getOperand(2); // If we have SSE[12] support, try to form min/max nodes. SSE min/max - // instructions have the peculiarity that if either operand is a NaN, - // they chose what we call the RHS operand (and as such are not symmetric). - // It happens that this matches the semantics of the common C idiom - // x<y?x:y and related forms, so we can recognize these cases. + // instructions match the semantics of the common C idiom x<y?x:y but not + // x<=y?x:y, because of how they handle negative zero (which can be + // ignored in unsafe-math mode). if (Subtarget->hasSSE2() && (LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) && Cond.getOpcode() == ISD::SETCC) { @@ -8817,36 +8852,34 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, unsigned Opcode = 0; // Check for x CC y ? x : y. - if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { + if (DAG.isEqualTo(LHS, Cond.getOperand(0)) && + DAG.isEqualTo(RHS, Cond.getOperand(1))) { switch (CC) { default: break; case ISD::SETULT: - // This can be a min if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(RHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(LHS)) + // Converting this to a min would handle NaNs incorrectly, and swapping + // the operands would cause it to handle comparisons between positive + // and negative zero incorrectly. + if (!FiniteOnlyFPMath() && + (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) { + if (!UnsafeFPMath && + !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; + std::swap(LHS, RHS); } Opcode = X86ISD::FMIN; break; case ISD::SETOLE: - // This can be a min if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(LHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(RHS)) - break; - } + // Converting this to a min would handle comparisons between positive + // and negative zero incorrectly. + if (!UnsafeFPMath && + !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) + break; Opcode = X86ISD::FMIN; break; case ISD::SETULE: - // This can be a min, but if either operand is a NaN we need it to - // preserve the original LHS. + // Converting this to a min would handle both negative zeros and NaNs + // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); case ISD::SETOLT: case ISD::SETLT: @@ -8855,32 +8888,29 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, break; case ISD::SETOGE: - // This can be a max if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(LHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(RHS)) - break; - } + // Converting this to a max would handle comparisons between positive + // and negative zero incorrectly. + if (!UnsafeFPMath && + !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(LHS)) + break; Opcode = X86ISD::FMAX; break; case ISD::SETUGT: - // This can be a max if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(RHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(LHS)) + // Converting this to a max would handle NaNs incorrectly, and swapping + // the operands would cause it to handle comparisons between positive + // and negative zero incorrectly. + if (!FiniteOnlyFPMath() && + (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) { + if (!UnsafeFPMath && + !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; + std::swap(LHS, RHS); } Opcode = X86ISD::FMAX; break; case ISD::SETUGE: - // This can be a max, but if either operand is a NaN we need it to - // preserve the original LHS. + // Converting this to a max would handle both negative zeros and NaNs + // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); case ISD::SETOGT: case ISD::SETGT: @@ -8889,36 +8919,33 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, break; } // Check for x CC y ? y : x -- a min/max with reversed arms. - } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { + } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) && + DAG.isEqualTo(RHS, Cond.getOperand(0))) { switch (CC) { default: break; case ISD::SETOGE: - // This can be a min if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(RHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(LHS)) + // Converting this to a min would handle comparisons between positive + // and negative zero incorrectly, and swapping the operands would + // cause it to handle NaNs incorrectly. + if (!UnsafeFPMath && + !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) { + if (!FiniteOnlyFPMath() && + (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) break; + std::swap(LHS, RHS); } Opcode = X86ISD::FMIN; break; case ISD::SETUGT: - // This can be a min if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(LHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(RHS)) - break; - } + // Converting this to a min would handle NaNs incorrectly. + if (!UnsafeFPMath && + (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) + break; Opcode = X86ISD::FMIN; break; case ISD::SETUGE: - // This can be a min, but if either operand is a NaN we need it to - // preserve the original LHS. + // Converting this to a min would handle both negative zeros and NaNs + // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); case ISD::SETOGT: case ISD::SETGT: @@ -8927,32 +8954,28 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, break; case ISD::SETULT: - // This can be a max if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(LHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(RHS)) - break; - } + // Converting this to a max would handle NaNs incorrectly. + if (!FiniteOnlyFPMath() && + (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) + break; Opcode = X86ISD::FMAX; break; case ISD::SETOLE: - // This can be a max if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(RHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(LHS)) + // Converting this to a max would handle comparisons between positive + // and negative zero incorrectly, and swapping the operands would + // cause it to handle NaNs incorrectly. + if (!UnsafeFPMath && + !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) { + if (!FiniteOnlyFPMath() && + (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) break; + std::swap(LHS, RHS); } Opcode = X86ISD::FMAX; break; case ISD::SETULE: - // This can be a max, but if either operand is a NaN we need it to - // preserve the original LHS. + // Converting this to a max would handle both negative zeros and NaNs + // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); case ISD::SETOLT: case ISD::SETLT: @@ -9177,10 +9200,6 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, /// LEA + SHL, LEA + LEA. static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { - if (DAG.getMachineFunction(). - getFunction()->hasFnAttr(Attribute::OptimizeForSize)) - return SDValue(); - if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) return SDValue(); @@ -9319,7 +9338,7 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, } } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) { if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) { - unsigned SplatIdx = cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex(); + unsigned SplatIdx= cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex(); if (C->getZExtValue() == SplatIdx) BaseShAmt = InVec.getOperand(1); } @@ -9505,7 +9524,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(), Ld->getSrcValue(), Ld->getSrcValueOffset(), Ld->isVolatile(), - Ld->getAlignment()); + Ld->isNonTemporal(), Ld->getAlignment()); SDValue NewChain = NewLd.getValue(1); if (TokenFactorIndex != -1) { Ops.push_back(NewChain); @@ -9514,7 +9533,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, } return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(), St->getSrcValue(), St->getSrcValueOffset(), - St->isVolatile(), St->getAlignment()); + St->isVolatile(), St->isNonTemporal(), + St->getAlignment()); } // Otherwise, lower to two pairs of 32-bit loads / stores. @@ -9524,10 +9544,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr, Ld->getSrcValue(), Ld->getSrcValueOffset(), - Ld->isVolatile(), Ld->getAlignment()); + Ld->isVolatile(), Ld->isNonTemporal(), + Ld->getAlignment()); SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr, Ld->getSrcValue(), Ld->getSrcValueOffset()+4, - Ld->isVolatile(), + Ld->isVolatile(), Ld->isNonTemporal(), MinAlign(Ld->getAlignment(), 4)); SDValue NewChain = LoLd.getValue(1); @@ -9544,11 +9565,13 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr, St->getSrcValue(), St->getSrcValueOffset(), - St->isVolatile(), St->getAlignment()); + St->isVolatile(), St->isNonTemporal(), + St->getAlignment()); SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr, St->getSrcValue(), St->getSrcValueOffset() + 4, St->isVolatile(), + St->isNonTemporal(), MinAlign(St->getAlignment(), 4)); return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt); } @@ -9731,7 +9754,7 @@ static bool LowerToBSwap(CallInst *CI) { // Verify this is a simple bswap. if (CI->getNumOperands() != 2 || CI->getType() != CI->getOperand(1)->getType() || - !CI->getType()->isInteger()) + !CI->getType()->isIntegerTy()) return false; const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); @@ -9780,17 +9803,26 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { return LowerToBSwap(CI); } // rorw $$8, ${0:w} --> llvm.bswap.i16 - if (CI->getType()->isInteger(16) && + if (CI->getType()->isIntegerTy(16) && AsmPieces.size() == 3 && - AsmPieces[0] == "rorw" && + (AsmPieces[0] == "rorw" || AsmPieces[0] == "rolw") && AsmPieces[1] == "$$8," && AsmPieces[2] == "${0:w}" && - IA->getConstraintString() == "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}") { - return LowerToBSwap(CI); + IA->getConstraintString().compare(0, 5, "=r,0,") == 0) { + AsmPieces.clear(); + SplitString(IA->getConstraintString().substr(5), AsmPieces, ","); + std::sort(AsmPieces.begin(), AsmPieces.end()); + if (AsmPieces.size() == 4 && + AsmPieces[0] == "~{cc}" && + AsmPieces[1] == "~{dirflag}" && + AsmPieces[2] == "~{flags}" && + AsmPieces[3] == "~{fpsr}") { + return LowerToBSwap(CI); + } } break; case 3: - if (CI->getType()->isInteger(64) && + if (CI->getType()->isIntegerTy(64) && Constraints.size() >= 2 && Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" && Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") { diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 193ef05..4c12fcc 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -180,7 +180,7 @@ namespace llvm { /// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector, /// corresponds to X86::PINSRW. - PINSRW, + PINSRW, MMX_PINSRW, /// PSHUFB - Shuffle 16 8-bit values within a vector. PSHUFB, @@ -249,6 +249,9 @@ namespace llvm { // with control flow. VASTART_SAVE_XMM_REGS, + // MINGW_ALLOCA - MingW's __alloca call to do stack probing. + MINGW_ALLOCA, + // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - // Atomic 64-bit binary operations. @@ -259,6 +262,10 @@ namespace llvm { ATOMAND64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG + + // WARNING: Do not add anything in the end unless you want the node to + // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be + // thought as target memory ops! }; } @@ -639,7 +646,6 @@ namespace llvm { int FPDiff, DebugLoc dl); CCAssignFn *CCAssignFnForNode(CallingConv::ID CallConv) const; - NameDecorationStyle NameDecorationForCallConv(CallingConv::ID CallConv); unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG); std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, @@ -790,7 +796,11 @@ namespace llvm { MachineBasicBlock *EmitLoweredSelect(MachineInstr *I, MachineBasicBlock *BB, DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; - + + MachineBasicBlock *EmitLoweredMingwAlloca(MachineInstr *MI, + MachineBasicBlock *BB, + DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + /// Emit nodes that will be selected as "test Op0,Op0", or something /// equivalent, for use with the given x86 condition code. SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG); diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 468dd67..8462255 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -59,10 +59,11 @@ def tls64addr : ComplexPattern<i64, 4, "SelectTLSADDRAddr", // Pattern fragments. // -def i64immSExt8 : PatLeaf<(i64 imm), [{ - // i64immSExt8 predicate - True if the 64-bit immediate fits in a 8-bit - // sign extended field. - return (int64_t)N->getZExtValue() == (int8_t)N->getZExtValue(); +def i64immSExt8 : PatLeaf<(i64 immSext8)>; + +def GetLo32XForm : SDNodeXForm<imm, [{ + // Transformation function: get the low 32 bits. + return getI32Imm((unsigned)N->getZExtValue()); }]>; def i64immSExt32 : PatLeaf<(i64 imm), [{ @@ -71,6 +72,7 @@ def i64immSExt32 : PatLeaf<(i64 imm), [{ return (int64_t)N->getZExtValue() == (int32_t)N->getZExtValue(); }]>; + def i64immZExt32 : PatLeaf<(i64 imm), [{ // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit // unsignedsign extended field. @@ -325,7 +327,7 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src), def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (load addr:$src))]>; @@ -556,7 +558,7 @@ def ADC64mi8 : RIi8<0x83, MRM2m, (outs), (ins i64mem:$dst, i64i8imm :$src2), addr:$dst)]>; def ADC64mi32 : RIi32<0x81, MRM2m, (outs), (ins i64mem:$dst, i64i32imm:$src2), "adc{q}\t{$src2, $dst|$dst, $src2}", - [(store (adde (load addr:$dst), i64immSExt8:$src2), + [(store (adde (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>; } // Uses = [EFLAGS] @@ -893,35 +895,38 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), let isTwoAddress = 1 in { def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src), "rcl{q}\t{1, $dst|$dst, 1}", []>; -def RCL64m1 : RI<0xD1, MRM2m, (outs i64mem:$dst), (ins i64mem:$src), - "rcl{q}\t{1, $dst|$dst, 1}", []>; -let Uses = [CL] in { -def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src), - "rcl{q}\t{%cl, $dst|$dst, CL}", []>; -def RCL64mCL : RI<0xD3, MRM2m, (outs i64mem:$dst), (ins i64mem:$src), - "rcl{q}\t{%cl, $dst|$dst, CL}", []>; -} def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCL64mi : RIi8<0xC1, MRM2m, (outs i64mem:$dst), - (ins i64mem:$src, i8imm:$cnt), - "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src), "rcr{q}\t{1, $dst|$dst, 1}", []>; -def RCR64m1 : RI<0xD1, MRM3m, (outs i64mem:$dst), (ins i64mem:$src), - "rcr{q}\t{1, $dst|$dst, 1}", []>; +def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), + "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; + let Uses = [CL] in { +def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src), + "rcl{q}\t{%cl, $dst|$dst, CL}", []>; def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src), "rcr{q}\t{%cl, $dst|$dst, CL}", []>; -def RCR64mCL : RI<0xD3, MRM3m, (outs i64mem:$dst), (ins i64mem:$src), - "rcr{q}\t{%cl, $dst|$dst, CL}", []>; } -def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), - "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR64mi : RIi8<0xC1, MRM3m, (outs i64mem:$dst), - (ins i64mem:$src, i8imm:$cnt), +} + +let isTwoAddress = 0 in { +def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst), + "rcl{q}\t{1, $dst|$dst, 1}", []>; +def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt), + "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst), + "rcr{q}\t{1, $dst|$dst, 1}", []>; +def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt), "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; + +let Uses = [CL] in { +def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst), + "rcl{q}\t{%cl, $dst|$dst, CL}", []>; +def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst), + "rcr{q}\t{%cl, $dst|$dst, CL}", []>; +} } let isTwoAddress = 1 in { @@ -1771,7 +1776,7 @@ def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB; -def SWPGS : I<0x01, RawFrm, (outs), (ins), "swpgs", []>, TB; +def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB; def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins), "push{q}\t%fs", []>, TB; @@ -1978,7 +1983,7 @@ def : Pat<(and GR64:$src, i64immZExt32:$imm), (i64 0), (AND32ri (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit), - imm:$imm), + (i32 (GetLo32XForm imm:$imm))), x86_subreg_32bit)>; // r & (2^32-1) ==> movz @@ -2102,34 +2107,34 @@ def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst), def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>; // (shl x (and y, 63)) ==> (shl x, y) -def : Pat<(shl GR64:$src1, (and CL:$amt, 63)), +def : Pat<(shl GR64:$src1, (and CL, 63)), (SHL64rCL GR64:$src1)>; -def : Pat<(store (shl (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst), +def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst), (SHL64mCL addr:$dst)>; -def : Pat<(srl GR64:$src1, (and CL:$amt, 63)), +def : Pat<(srl GR64:$src1, (and CL, 63)), (SHR64rCL GR64:$src1)>; -def : Pat<(store (srl (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst), +def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst), (SHR64mCL addr:$dst)>; -def : Pat<(sra GR64:$src1, (and CL:$amt, 63)), +def : Pat<(sra GR64:$src1, (and CL, 63)), (SAR64rCL GR64:$src1)>; -def : Pat<(store (sra (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst), +def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst), (SAR64mCL addr:$dst)>; // Double shift patterns -def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)), +def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm)), (SHRD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>; def : Pat<(store (shrd (loadi64 addr:$dst), (i8 imm:$amt1), - GR64:$src2, (i8 imm:$amt2)), addr:$dst), + GR64:$src2, (i8 imm)), addr:$dst), (SHRD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; -def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)), +def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm)), (SHLD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>; def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1), - GR64:$src2, (i8 imm:$amt2)), addr:$dst), + GR64:$src2, (i8 imm)), addr:$dst), (SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index e22a903..ae24bfb 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -397,7 +397,7 @@ def CMOVNP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins), let canFoldAsLoad = 1 in { def LD_Fp32m : FpIf32<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP, [(set RFP32:$dst, (loadf32 addr:$src))]>; -let isReMaterializable = 1, mayHaveSideEffects = 1 in +let isReMaterializable = 1 in def LD_Fp64m : FpIf64<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP, [(set RFP64:$dst, (loadf64 addr:$src))]>; def LD_Fp80m : FpI_<(outs RFP80:$dst), (ins f80mem:$src), ZeroArgFP, diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index a799f16..bb81cbf 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -29,7 +29,16 @@ def MRM0m : Format<24>; def MRM1m : Format<25>; def MRM2m : Format<26>; def MRM3m : Format<27>; def MRM4m : Format<28>; def MRM5m : Format<29>; def MRM6m : Format<30>; def MRM7m : Format<31>; def MRMInitReg : Format<32>; - +def MRM_C1 : Format<33>; +def MRM_C2 : Format<34>; +def MRM_C3 : Format<35>; +def MRM_C4 : Format<36>; +def MRM_C8 : Format<37>; +def MRM_C9 : Format<38>; +def MRM_E8 : Format<39>; +def MRM_F0 : Format<40>; +def MRM_F8 : Format<41>; +def MRM_F9 : Format<42>; // ImmType - This specifies the immediate type used by an instruction. This is // part of the ad-hoc solution used to emit machine instruction encodings by our @@ -37,11 +46,13 @@ def MRMInitReg : Format<32>; class ImmType<bits<3> val> { bits<3> Value = val; } -def NoImm : ImmType<0>; -def Imm8 : ImmType<1>; -def Imm16 : ImmType<2>; -def Imm32 : ImmType<3>; -def Imm64 : ImmType<4>; +def NoImm : ImmType<0>; +def Imm8 : ImmType<1>; +def Imm8PCRel : ImmType<2>; +def Imm16 : ImmType<3>; +def Imm32 : ImmType<4>; +def Imm32PCRel : ImmType<5>; +def Imm64 : ImmType<6>; // FPFormat - This specifies what form this FP instruction has. This is used by // the Floating-Point stackifier pass. @@ -121,6 +132,12 @@ class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm, let Pattern = pattern; let CodeSize = 3; } +class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, + list<dag> pattern> + : X86Inst<o, f, Imm8PCRel, outs, ins, asm> { + let Pattern = pattern; + let CodeSize = 3; +} class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm, list<dag> pattern> : X86Inst<o, f, Imm16, outs, ins, asm> { @@ -134,6 +151,13 @@ class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm, let CodeSize = 3; } +class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, + list<dag> pattern> + : X86Inst<o, f, Imm32PCRel, outs, ins, asm> { + let Pattern = pattern; + let CodeSize = 3; +} + // FPStack Instruction Templates: // FPI - Floating Point Instruction template. class FPI<bits<8> o, Format F, dag outs, dag ins, string asm> diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 8d13c0f..39bda04 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -276,11 +276,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MOVDQArr, X86::MOVDQAmr, 0, 16 }, { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 }, { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 }, - { X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0, 0 }, - { X86::MOVSDrr, X86::MOVSDmr, 0, 0 }, { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 }, { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 }, - { X86::MOVSSrr, X86::MOVSSmr, 0, 0 }, { X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 }, { X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 }, { X86::MUL16r, X86::MUL16m, 1, 0 }, @@ -389,12 +386,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, { X86::MOVDQArr, X86::MOVDQArm, 16 }, - { X86::MOVSD2PDrr, X86::MOVSD2PDrm, 0 }, - { X86::MOVSDrr, X86::MOVSDrm, 0 }, { X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 }, { X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 }, - { X86::MOVSS2PSrr, X86::MOVSS2PSrm, 0 }, - { X86::MOVSSrr, X86::MOVSSrm, 0 }, { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 }, { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 }, { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 }, @@ -682,23 +675,20 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, case X86::MOV16rr: case X86::MOV32rr: case X86::MOV64rr: - case X86::MOVSSrr: - case X86::MOVSDrr: // FP Stack register class copies case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080: case X86::MOV_Fp3264: case X86::MOV_Fp3280: case X86::MOV_Fp6432: case X86::MOV_Fp8032: - + + // Note that MOVSSrr and MOVSDrr are not considered copies. FR32 and FR64 + // copies are done with FsMOVAPSrr and FsMOVAPDrr. + case X86::FsMOVAPSrr: case X86::FsMOVAPDrr: case X86::MOVAPSrr: case X86::MOVAPDrr: case X86::MOVDQArr: - case X86::MOVSS2PSrr: - case X86::MOVSD2PDrr: - case X86::MOVPS2SSrr: - case X86::MOVPD2SDrr: case X86::MMX_MOVQ64rr: assert(MI.getNumOperands() >= 2 && MI.getOperand(0).isReg() && @@ -1083,7 +1073,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, case X86::MOV8r0: Opc = X86::MOV8ri; break; case X86::MOV16r0: Opc = X86::MOV16ri; break; case X86::MOV32r0: Opc = X86::MOV32ri; break; - case X86::MOV64r0: Opc = X86::MOV64ri; break; + case X86::MOV64r0: Opc = X86::MOV64ri64i32; break; } Clone = false; } @@ -1587,44 +1577,44 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) { switch (BrOpc) { default: return X86::COND_INVALID; - case X86::JE: return X86::COND_E; - case X86::JNE: return X86::COND_NE; - case X86::JL: return X86::COND_L; - case X86::JLE: return X86::COND_LE; - case X86::JG: return X86::COND_G; - case X86::JGE: return X86::COND_GE; - case X86::JB: return X86::COND_B; - case X86::JBE: return X86::COND_BE; - case X86::JA: return X86::COND_A; - case X86::JAE: return X86::COND_AE; - case X86::JS: return X86::COND_S; - case X86::JNS: return X86::COND_NS; - case X86::JP: return X86::COND_P; - case X86::JNP: return X86::COND_NP; - case X86::JO: return X86::COND_O; - case X86::JNO: return X86::COND_NO; + case X86::JE_4: return X86::COND_E; + case X86::JNE_4: return X86::COND_NE; + case X86::JL_4: return X86::COND_L; + case X86::JLE_4: return X86::COND_LE; + case X86::JG_4: return X86::COND_G; + case X86::JGE_4: return X86::COND_GE; + case X86::JB_4: return X86::COND_B; + case X86::JBE_4: return X86::COND_BE; + case X86::JA_4: return X86::COND_A; + case X86::JAE_4: return X86::COND_AE; + case X86::JS_4: return X86::COND_S; + case X86::JNS_4: return X86::COND_NS; + case X86::JP_4: return X86::COND_P; + case X86::JNP_4: return X86::COND_NP; + case X86::JO_4: return X86::COND_O; + case X86::JNO_4: return X86::COND_NO; } } unsigned X86::GetCondBranchFromCond(X86::CondCode CC) { switch (CC) { default: llvm_unreachable("Illegal condition code!"); - case X86::COND_E: return X86::JE; - case X86::COND_NE: return X86::JNE; - case X86::COND_L: return X86::JL; - case X86::COND_LE: return X86::JLE; - case X86::COND_G: return X86::JG; - case X86::COND_GE: return X86::JGE; - case X86::COND_B: return X86::JB; - case X86::COND_BE: return X86::JBE; - case X86::COND_A: return X86::JA; - case X86::COND_AE: return X86::JAE; - case X86::COND_S: return X86::JS; - case X86::COND_NS: return X86::JNS; - case X86::COND_P: return X86::JP; - case X86::COND_NP: return X86::JNP; - case X86::COND_O: return X86::JO; - case X86::COND_NO: return X86::JNO; + case X86::COND_E: return X86::JE_4; + case X86::COND_NE: return X86::JNE_4; + case X86::COND_L: return X86::JL_4; + case X86::COND_LE: return X86::JLE_4; + case X86::COND_G: return X86::JG_4; + case X86::COND_GE: return X86::JGE_4; + case X86::COND_B: return X86::JB_4; + case X86::COND_BE: return X86::JBE_4; + case X86::COND_A: return X86::JA_4; + case X86::COND_AE: return X86::JAE_4; + case X86::COND_S: return X86::JS_4; + case X86::COND_NS: return X86::JNS_4; + case X86::COND_P: return X86::JP_4; + case X86::COND_NP: return X86::JNP_4; + case X86::COND_O: return X86::JO_4; + case X86::COND_NO: return X86::JNO_4; } } @@ -1694,7 +1684,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return true; // Handle unconditional branches. - if (I->getOpcode() == X86::JMP) { + if (I->getOpcode() == X86::JMP_4) { if (!AllowModify) { TBB = I->getOperand(0).getMBB(); continue; @@ -1778,7 +1768,7 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { while (I != MBB.begin()) { --I; - if (I->getOpcode() != X86::JMP && + if (I->getOpcode() != X86::JMP_4 && GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID) break; // Remove the branch. @@ -1804,7 +1794,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, if (Cond.empty()) { // Unconditional branch? assert(!FBB && "Unconditional branch with multiple successors!"); - BuildMI(&MBB, dl, get(X86::JMP)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(TBB); return 1; } @@ -1814,16 +1804,16 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, switch (CC) { case X86::COND_NP_OR_E: // Synthesize NP_OR_E with two branches. - BuildMI(&MBB, dl, get(X86::JNP)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB); ++Count; - BuildMI(&MBB, dl, get(X86::JE)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(TBB); ++Count; break; case X86::COND_NE_OR_P: // Synthesize NE_OR_P with two branches. - BuildMI(&MBB, dl, get(X86::JNE)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(TBB); ++Count; - BuildMI(&MBB, dl, get(X86::JP)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB); ++Count; break; default: { @@ -1834,7 +1824,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, } if (FBB) { // Two-way Conditional branch. Insert the second branch. - BuildMI(&MBB, dl, get(X86::JMP)).addMBB(FBB); + BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(FBB); ++Count; } return Count; @@ -1860,7 +1850,7 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, CommonRC = SrcRC; else if (!DestRC->hasSubClass(SrcRC)) { // Neither of GR64_NOREX or GR64_NOSP is a superclass of the other, - // but we want to copy then as GR64. Similarly, for GR32_NOREX and + // but we want to copy them as GR64. Similarly, for GR32_NOREX and // GR32_NOSP, copy as GR32. if (SrcRC->hasSuperClass(&X86::GR64RegClass) && DestRC->hasSuperClass(&X86::GR64RegClass)) @@ -3556,6 +3546,14 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, } } break; + + case X86II::MRM_C1: + case X86II::MRM_C8: + case X86II::MRM_C9: + case X86II::MRM_E8: + case X86II::MRM_F0: + FinalSize += 2; + break; } case X86II::MRMInitReg: diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index a6b3863..5111719 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -268,6 +268,18 @@ namespace X86II { // MRMInitReg - This form is used for instructions whose source and // destinations are the same register. MRMInitReg = 32, + + //// MRM_C1 - A mod/rm byte of exactly 0xC1. + MRM_C1 = 33, + MRM_C2 = 34, + MRM_C3 = 35, + MRM_C4 = 36, + MRM_C8 = 37, + MRM_C9 = 38, + MRM_E8 = 39, + MRM_F0 = 40, + MRM_F8 = 41, + MRM_F9 = 42, FormMask = 63, @@ -331,11 +343,13 @@ namespace X86II { // This three-bit field describes the size of an immediate operand. Zero is // unused so that we can tell if we forgot to set a value. ImmShift = 13, - ImmMask = 7 << ImmShift, - Imm8 = 1 << ImmShift, - Imm16 = 2 << ImmShift, - Imm32 = 3 << ImmShift, - Imm64 = 4 << ImmShift, + ImmMask = 7 << ImmShift, + Imm8 = 1 << ImmShift, + Imm8PCRel = 2 << ImmShift, + Imm16 = 3 << ImmShift, + Imm32 = 4 << ImmShift, + Imm32PCRel = 5 << ImmShift, + Imm64 = 6 << ImmShift, //===------------------------------------------------------------------===// // FP Instruction Classification... Zero is non-fp instruction. @@ -396,15 +410,37 @@ namespace X86II { return TSFlags >> X86II::OpcodeShift; } + static inline bool hasImm(unsigned TSFlags) { + return (TSFlags & X86II::ImmMask) != 0; + } + /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field /// of the specified instruction. static inline unsigned getSizeOfImm(unsigned TSFlags) { switch (TSFlags & X86II::ImmMask) { default: assert(0 && "Unknown immediate size"); - case X86II::Imm8: return 1; - case X86II::Imm16: return 2; - case X86II::Imm32: return 4; - case X86II::Imm64: return 8; + case X86II::Imm8: + case X86II::Imm8PCRel: return 1; + case X86II::Imm16: return 2; + case X86II::Imm32: + case X86II::Imm32PCRel: return 4; + case X86II::Imm64: return 8; + } + } + + /// isImmPCRel - Return true if the immediate of the specified instruction's + /// TSFlags indicates that it is pc relative. + static inline unsigned isImmPCRel(unsigned TSFlags) { + switch (TSFlags & X86II::ImmMask) { + default: assert(0 && "Unknown immediate size"); + case X86II::Imm8PCRel: + case X86II::Imm32PCRel: + return true; + case X86II::Imm8: + case X86II::Imm16: + case X86II::Imm32: + case X86II::Imm64: + return false; } } } diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index f0b4239..8a6ff54 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -65,7 +65,7 @@ def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>, def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>; -def SDTX86RdTsc : SDTypeProfile<0, 0, []>; +def SDTX86Void : SDTypeProfile<0, 0, []>; def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; @@ -143,7 +143,7 @@ def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr, [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore, SDNPMayLoad]>; -def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG",SDTX86RdTsc, +def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void, [SDNPHasChain, SDNPOutFlag, SDNPSideEffect]>; def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>; @@ -178,6 +178,9 @@ def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags, def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; +def X86MingwAlloca : SDNode<"X86ISD::MINGW_ALLOCA", SDTX86Void, + [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; + //===----------------------------------------------------------------------===// // X86 Operand Definitions. // @@ -343,18 +346,37 @@ def X86_COND_O : PatLeaf<(i8 13)>; def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE def X86_COND_S : PatLeaf<(i8 15)>; -def i16immSExt8 : PatLeaf<(i16 imm), [{ - // i16immSExt8 predicate - True if the 16-bit immediate fits in a 8-bit - // sign extended field. - return (int16_t)N->getZExtValue() == (int8_t)N->getZExtValue(); +def immSext8 : PatLeaf<(imm), [{ + return N->getSExtValue() == (int8_t)N->getSExtValue(); }]>; -def i32immSExt8 : PatLeaf<(i32 imm), [{ - // i32immSExt8 predicate - True if the 32-bit immediate fits in a 8-bit - // sign extended field. - return (int32_t)N->getZExtValue() == (int8_t)N->getZExtValue(); +def i16immSExt8 : PatLeaf<(i16 immSext8)>; +def i32immSExt8 : PatLeaf<(i32 immSext8)>; + +/// Load patterns: these constraint the match to the right address space. +def dsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) + if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) + if (PT->getAddressSpace() > 255) + return false; + return true; }]>; +def gsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) + if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) + return PT->getAddressSpace() == 256; + return false; +}]>; + +def fsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) + if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) + return PT->getAddressSpace() == 257; + return false; +}]>; + + // Helper fragments for loads. // It's always safe to treat a anyext i16 load as a i32 load if the i16 is // known to be 32-bit aligned or better. Ditto for i8 to i16. @@ -372,8 +394,7 @@ def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{ return false; }]>; -def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), -[{ +def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),[{ LoadSDNode *LD = cast<LoadSDNode>(N); if (const Value *Src = LD->getSrcValue()) if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) @@ -399,72 +420,11 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{ return false; }]>; -def nvloadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{ - LoadSDNode *LD = cast<LoadSDNode>(N); - if (const Value *Src = LD->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; - if (LD->isVolatile()) - return false; - ISD::LoadExtType ExtType = LD->getExtensionType(); - if (ExtType == ISD::NON_EXTLOAD) - return true; - if (ExtType == ISD::EXTLOAD) - return LD->getAlignment() >= 4; - return false; -}]>; - -def gsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - return PT->getAddressSpace() == 256; - return false; -}]>; - -def fsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - return PT->getAddressSpace() == 257; - return false; -}]>; - -def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr)), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; - return true; -}]>; -def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr)), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; - return true; -}]>; - -def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr)), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; - return true; -}]>; -def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr)), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; - return true; -}]>; -def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr)), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; - return true; -}]>; +def loadi8 : PatFrag<(ops node:$ptr), (i8 (dsload node:$ptr))>; +def loadi64 : PatFrag<(ops node:$ptr), (i64 (dsload node:$ptr))>; +def loadf32 : PatFrag<(ops node:$ptr), (f32 (dsload node:$ptr))>; +def loadf64 : PatFrag<(ops node:$ptr), (f64 (dsload node:$ptr))>; +def loadf80 : PatFrag<(ops node:$ptr), (f80 (dsload node:$ptr))>; def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>; def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>; @@ -562,7 +522,7 @@ def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), } // x86-64 va_start lowering magic. -let usesCustomInserter = 1 in +let usesCustomInserter = 1 in { def VASTART_SAVE_XMM_REGS : I<0, Pseudo, (outs), (ins GR8:$al, @@ -573,6 +533,19 @@ def VASTART_SAVE_XMM_REGS : I<0, Pseudo, imm:$regsavefi, imm:$offset)]>; +// Dynamic stack allocation yields _alloca call for Cygwin/Mingw targets. Calls +// to _alloca is needed to probe the stack when allocating more than 4k bytes in +// one go. Touching the stack at 4K increments is necessary to ensure that the +// guard pages used by the OS virtual memory manager are allocated in correct +// sequence. +// The main point of having separate instruction are extra unmodelled effects +// (compared to ordinary calls) like stack pointer change. + +def MINGW_ALLOCA : I<0, Pseudo, (outs), (ins), + "# dynamic stack allocation", + [(X86MingwAlloca)]>; +} + // Nop let neverHasSideEffects = 1 in { def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>; @@ -596,7 +569,7 @@ let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in "", []>; //===----------------------------------------------------------------------===// -// Control Flow Instructions... +// Control Flow Instructions. // // Return instructions. @@ -614,16 +587,46 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, "lret\t$amt", []>; } -// All branches are RawFrm, Void, Branch, and Terminators -let isBranch = 1, isTerminator = 1 in - class IBr<bits<8> opcode, dag ins, string asm, list<dag> pattern> : - I<opcode, RawFrm, (outs), ins, asm, pattern>; +// Unconditional branches. +let isBarrier = 1, isBranch = 1, isTerminator = 1 in { + def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst), + "jmp\t$dst", [(br bb:$dst)]>; + def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst), + "jmp\t$dst", []>; +} -let isBranch = 1, isBarrier = 1 in { - def JMP : IBr<0xE9, (ins brtarget:$dst), "jmp\t$dst", [(br bb:$dst)]>; - def JMP8 : IBr<0xEB, (ins brtarget8:$dst), "jmp\t$dst", []>; +// Conditional Branches. +let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in { + multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> { + def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>; + def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm, + [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB; + } } +defm JO : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>; +defm JNO : ICBr<0x71, 0x81, "jno\t$dst" , X86_COND_NO>; +defm JB : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>; +defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>; +defm JE : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>; +defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>; +defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>; +defm JA : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>; +defm JS : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>; +defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>; +defm JP : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>; +defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>; +defm JL : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>; +defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>; +defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>; +defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>; + +// FIXME: What about the CX/RCX versions of this instruction? +let Uses = [ECX], isBranch = 1, isTerminator = 1 in + def JCXZ8 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), + "jcxz\t$dst", []>; + + // Indirect branches let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst", @@ -644,63 +647,6 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { "ljmp{l}\t{*}$dst", []>; } -// Conditional branches -let Uses = [EFLAGS] in { -// Short conditional jumps -def JO8 : IBr<0x70, (ins brtarget8:$dst), "jo\t$dst", []>; -def JNO8 : IBr<0x71, (ins brtarget8:$dst), "jno\t$dst", []>; -def JB8 : IBr<0x72, (ins brtarget8:$dst), "jb\t$dst", []>; -def JAE8 : IBr<0x73, (ins brtarget8:$dst), "jae\t$dst", []>; -def JE8 : IBr<0x74, (ins brtarget8:$dst), "je\t$dst", []>; -def JNE8 : IBr<0x75, (ins brtarget8:$dst), "jne\t$dst", []>; -def JBE8 : IBr<0x76, (ins brtarget8:$dst), "jbe\t$dst", []>; -def JA8 : IBr<0x77, (ins brtarget8:$dst), "ja\t$dst", []>; -def JS8 : IBr<0x78, (ins brtarget8:$dst), "js\t$dst", []>; -def JNS8 : IBr<0x79, (ins brtarget8:$dst), "jns\t$dst", []>; -def JP8 : IBr<0x7A, (ins brtarget8:$dst), "jp\t$dst", []>; -def JNP8 : IBr<0x7B, (ins brtarget8:$dst), "jnp\t$dst", []>; -def JL8 : IBr<0x7C, (ins brtarget8:$dst), "jl\t$dst", []>; -def JGE8 : IBr<0x7D, (ins brtarget8:$dst), "jge\t$dst", []>; -def JLE8 : IBr<0x7E, (ins brtarget8:$dst), "jle\t$dst", []>; -def JG8 : IBr<0x7F, (ins brtarget8:$dst), "jg\t$dst", []>; - -def JCXZ8 : IBr<0xE3, (ins brtarget8:$dst), "jcxz\t$dst", []>; - -def JE : IBr<0x84, (ins brtarget:$dst), "je\t$dst", - [(X86brcond bb:$dst, X86_COND_E, EFLAGS)]>, TB; -def JNE : IBr<0x85, (ins brtarget:$dst), "jne\t$dst", - [(X86brcond bb:$dst, X86_COND_NE, EFLAGS)]>, TB; -def JL : IBr<0x8C, (ins brtarget:$dst), "jl\t$dst", - [(X86brcond bb:$dst, X86_COND_L, EFLAGS)]>, TB; -def JLE : IBr<0x8E, (ins brtarget:$dst), "jle\t$dst", - [(X86brcond bb:$dst, X86_COND_LE, EFLAGS)]>, TB; -def JG : IBr<0x8F, (ins brtarget:$dst), "jg\t$dst", - [(X86brcond bb:$dst, X86_COND_G, EFLAGS)]>, TB; -def JGE : IBr<0x8D, (ins brtarget:$dst), "jge\t$dst", - [(X86brcond bb:$dst, X86_COND_GE, EFLAGS)]>, TB; - -def JB : IBr<0x82, (ins brtarget:$dst), "jb\t$dst", - [(X86brcond bb:$dst, X86_COND_B, EFLAGS)]>, TB; -def JBE : IBr<0x86, (ins brtarget:$dst), "jbe\t$dst", - [(X86brcond bb:$dst, X86_COND_BE, EFLAGS)]>, TB; -def JA : IBr<0x87, (ins brtarget:$dst), "ja\t$dst", - [(X86brcond bb:$dst, X86_COND_A, EFLAGS)]>, TB; -def JAE : IBr<0x83, (ins brtarget:$dst), "jae\t$dst", - [(X86brcond bb:$dst, X86_COND_AE, EFLAGS)]>, TB; - -def JS : IBr<0x88, (ins brtarget:$dst), "js\t$dst", - [(X86brcond bb:$dst, X86_COND_S, EFLAGS)]>, TB; -def JNS : IBr<0x89, (ins brtarget:$dst), "jns\t$dst", - [(X86brcond bb:$dst, X86_COND_NS, EFLAGS)]>, TB; -def JP : IBr<0x8A, (ins brtarget:$dst), "jp\t$dst", - [(X86brcond bb:$dst, X86_COND_P, EFLAGS)]>, TB; -def JNP : IBr<0x8B, (ins brtarget:$dst), "jnp\t$dst", - [(X86brcond bb:$dst, X86_COND_NP, EFLAGS)]>, TB; -def JO : IBr<0x80, (ins brtarget:$dst), "jo\t$dst", - [(X86brcond bb:$dst, X86_COND_O, EFLAGS)]>, TB; -def JNO : IBr<0x81, (ins brtarget:$dst), "jno\t$dst", - [(X86brcond bb:$dst, X86_COND_NO, EFLAGS)]>, TB; -} // Uses = [EFLAGS] // Loop instructions @@ -721,7 +667,7 @@ let isCall = 1 in XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], Uses = [ESP] in { - def CALLpcrel32 : Ii32<0xE8, RawFrm, + def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm, (outs), (ins i32imm_pcrel:$dst,variable_ops), "call\t$dst", []>; def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops), @@ -761,8 +707,10 @@ def TCRETURNri : I<0, Pseudo, (outs), "#TC_RETURN $dst $offset", []>; -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in - def TAILJMPd : IBr<0xE9, (ins i32imm_pcrel:$dst, variable_ops), +// FIXME: The should be pseudo instructions that are lowered when going to +// mcinst. +let isCall = 1, isBranch = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in + def TAILJMPd : Ii32<0xE9, RawFrm, (outs),(ins i32imm_pcrel:$dst,variable_ops), "jmp\t$dst # TAILCALL", []>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in @@ -929,6 +877,9 @@ let Defs = [RAX, RDX] in def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, TB; +let Defs = [RAX, RCX, RDX] in +def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB; + let isBarrier = 1, hasCtrlDep = 1 in { def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB; } @@ -1059,7 +1010,7 @@ def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in { +let canFoldAsLoad = 1, isReMaterializable = 1 in { def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src), "mov{b}\t{$src, $dst|$dst, $src}", [(set GR8:$dst, (loadi8 addr:$src))]>; @@ -1093,7 +1044,7 @@ def MOV8mr_NOREX : I<0x88, MRMDestMem, (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>; let mayLoad = 1, - canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in + canFoldAsLoad = 1, isReMaterializable = 1 in def MOV8rm_NOREX : I<0x8A, MRMSrcMem, (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>; @@ -1115,7 +1066,10 @@ def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG_32:$dst), (ins GR32:$src), // // Extra precision multiplication -let Defs = [AL,AH,EFLAGS], Uses = [AL] in + +// AL is really implied by AX, by the registers in Defs must match the +// SDNode results (i8, i32). +let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src", // FIXME: Used for 8-bit mul, ignore result upper 8 bits. // This probably ought to be moved to a def : Pat<> if the @@ -1133,7 +1087,7 @@ def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src), "mul{l}\t$src", []>; // EAX,EDX = EAX*GR32 -let Defs = [AL,AH,EFLAGS], Uses = [AL] in +let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), "mul{b}\t$src", // FIXME: Used for 8-bit mul, ignore result upper 8 bits. @@ -1155,7 +1109,7 @@ def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src), } let neverHasSideEffects = 1 in { -let Defs = [AL,AH,EFLAGS], Uses = [AL] in +let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>; // AL,AH = AL*GR8 let Defs = [AX,DX,EFLAGS], Uses = [AX] in @@ -1165,7 +1119,7 @@ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>; // EAX,EDX = EAX*GR32 let mayLoad = 1 in { -let Defs = [AL,AH,EFLAGS], Uses = [AL] in +let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src), "imul{b}\t$src", []>; // AL,AH = AL*[mem8] let Defs = [AX,DX,EFLAGS], Uses = [AX] in @@ -1178,7 +1132,7 @@ def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src), } // neverHasSideEffects // unsigned division/remainder -let Defs = [AL,AH,EFLAGS], Uses = [AX] in +let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "div{b}\t$src", []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in @@ -1188,7 +1142,7 @@ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX "div{l}\t$src", []>; let mayLoad = 1 in { -let Defs = [AL,AH,EFLAGS], Uses = [AX] in +let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH "div{b}\t$src", []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in @@ -1201,7 +1155,7 @@ def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src), } // Signed division/remainder. -let Defs = [AL,AH,EFLAGS], Uses = [AX] in +let Defs = [AL,EFLAGS,AX], Uses = [AX] in def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "idiv{b}\t$src", []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in @@ -1211,7 +1165,7 @@ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX "idiv{l}\t$src", []>; let mayLoad = 1, mayLoad = 1 in { -let Defs = [AL,AH,EFLAGS], Uses = [AX] in +let Defs = [AL,EFLAGS,AX], Uses = [AX] in def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH "idiv{b}\t$src", []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in @@ -2328,98 +2282,100 @@ let isTwoAddress = 0 in { def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src), "rcl{b}\t{1, $dst|$dst, 1}", []>; -def RCL8m1 : I<0xD0, MRM2m, (outs i8mem:$dst), (ins i8mem:$src), - "rcl{b}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src), "rcl{b}\t{%cl, $dst|$dst, CL}", []>; -def RCL8mCL : I<0xD2, MRM2m, (outs i8mem:$dst), (ins i8mem:$src), - "rcl{b}\t{%cl, $dst|$dst, CL}", []>; } def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt), "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCL8mi : Ii8<0xC0, MRM2m, (outs i8mem:$dst), (ins i8mem:$src, i8imm:$cnt), - "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src), "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize; -def RCL16m1 : I<0xD1, MRM2m, (outs i16mem:$dst), (ins i16mem:$src), - "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize; let Uses = [CL] in { def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src), "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; -def RCL16mCL : I<0xD3, MRM2m, (outs i16mem:$dst), (ins i16mem:$src), - "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; } def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt), "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; -def RCL16mi : Ii8<0xC1, MRM2m, (outs i16mem:$dst), - (ins i16mem:$src, i8imm:$cnt), - "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src), "rcl{l}\t{1, $dst|$dst, 1}", []>; -def RCL32m1 : I<0xD1, MRM2m, (outs i32mem:$dst), (ins i32mem:$src), - "rcl{l}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src), "rcl{l}\t{%cl, $dst|$dst, CL}", []>; -def RCL32mCL : I<0xD3, MRM2m, (outs i32mem:$dst), (ins i32mem:$src), - "rcl{l}\t{%cl, $dst|$dst, CL}", []>; } def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt), "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCL32mi : Ii8<0xC1, MRM2m, (outs i32mem:$dst), - (ins i32mem:$src, i8imm:$cnt), - "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src), "rcr{b}\t{1, $dst|$dst, 1}", []>; -def RCR8m1 : I<0xD0, MRM3m, (outs i8mem:$dst), (ins i8mem:$src), - "rcr{b}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src), "rcr{b}\t{%cl, $dst|$dst, CL}", []>; -def RCR8mCL : I<0xD2, MRM3m, (outs i8mem:$dst), (ins i8mem:$src), - "rcr{b}\t{%cl, $dst|$dst, CL}", []>; } def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt), "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR8mi : Ii8<0xC0, MRM3m, (outs i8mem:$dst), (ins i8mem:$src, i8imm:$cnt), - "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src), "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize; -def RCR16m1 : I<0xD1, MRM3m, (outs i16mem:$dst), (ins i16mem:$src), - "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize; let Uses = [CL] in { def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src), "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; -def RCR16mCL : I<0xD3, MRM3m, (outs i16mem:$dst), (ins i16mem:$src), - "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; } def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt), "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; -def RCR16mi : Ii8<0xC1, MRM3m, (outs i16mem:$dst), - (ins i16mem:$src, i8imm:$cnt), - "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src), "rcr{l}\t{1, $dst|$dst, 1}", []>; -def RCR32m1 : I<0xD1, MRM3m, (outs i32mem:$dst), (ins i32mem:$src), - "rcr{l}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src), "rcr{l}\t{%cl, $dst|$dst, CL}", []>; -def RCR32mCL : I<0xD3, MRM3m, (outs i32mem:$dst), (ins i32mem:$src), - "rcr{l}\t{%cl, $dst|$dst, CL}", []>; } def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt), "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR32mi : Ii8<0xC1, MRM3m, (outs i32mem:$dst), - (ins i32mem:$src, i8imm:$cnt), + +let isTwoAddress = 0 in { +def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst), + "rcl{b}\t{1, $dst|$dst, 1}", []>; +def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt), + "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst), + "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize; +def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt), + "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; +def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst), + "rcl{l}\t{1, $dst|$dst, 1}", []>; +def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt), + "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst), + "rcr{b}\t{1, $dst|$dst, 1}", []>; +def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt), + "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst), + "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize; +def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt), + "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; +def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst), + "rcr{l}\t{1, $dst|$dst, 1}", []>; +def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt), "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; +let Uses = [CL] in { +def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst), + "rcl{b}\t{%cl, $dst|$dst, CL}", []>; +def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst), + "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; +def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst), + "rcl{l}\t{%cl, $dst|$dst, CL}", []>; +def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst), + "rcr{b}\t{%cl, $dst|$dst, CL}", []>; +def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst), + "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; +def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst), + "rcr{l}\t{%cl, $dst|$dst, CL}", []>; +} +} + // FIXME: provide shorter instructions when imm8 == 1 let Uses = [CL] in { def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src), @@ -4100,7 +4056,7 @@ def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB; -def INVLPG : I<0x01, RawFrm, (outs), (ins), "invlpg", []>, TB; +def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB; def STRr : I<0x00, MRM1r, (outs GR16:$dst), (ins), "str{w}\t{$dst}", []>, TB; @@ -4262,17 +4218,17 @@ def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB; // VMX instructions // 66 0F 38 80 -def INVEPT : I<0x38, RawFrm, (outs), (ins), "invept", []>, OpSize, TB; +def INVEPT : I<0x80, RawFrm, (outs), (ins), "invept", []>, OpSize, T8; // 66 0F 38 81 -def INVVPID : I<0x38, RawFrm, (outs), (ins), "invvpid", []>, OpSize, TB; +def INVVPID : I<0x81, RawFrm, (outs), (ins), "invvpid", []>, OpSize, T8; // 0F 01 C1 -def VMCALL : I<0x01, RawFrm, (outs), (ins), "vmcall", []>, TB; +def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB; def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), "vmclear\t$vmcs", []>, OpSize, TB; // 0F 01 C2 -def VMLAUNCH : I<0x01, RawFrm, (outs), (ins), "vmlaunch", []>, TB; +def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB; // 0F 01 C3 -def VMRESUME : I<0x01, RawFrm, (outs), (ins), "vmresume", []>, TB; +def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB; def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), "vmptrld\t$vmcs", []>, TB; def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins), @@ -4294,7 +4250,7 @@ def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB; // 0F 01 C4 -def VMXOFF : I<0x01, RawFrm, (outs), (ins), "vmxoff", []>, OpSize; +def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB; def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon), "vmxon\t{$vmxon}", []>, XD; @@ -4462,12 +4418,6 @@ def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>; def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>; def : Pat<(i32 (anyext GR16:$src)), (MOVZX32rr16 GR16:$src)>; -// (and (i32 load), 255) -> (zextload i8) -def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 255))), - (MOVZX32rm8 addr:$src)>; -def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 65535))), - (MOVZX32rm16 addr:$src)>; - //===----------------------------------------------------------------------===// // Some peepholes //===----------------------------------------------------------------------===// @@ -4563,43 +4513,43 @@ def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>; def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>; // (shl x (and y, 31)) ==> (shl x, y) -def : Pat<(shl GR8:$src1, (and CL:$amt, 31)), +def : Pat<(shl GR8:$src1, (and CL, 31)), (SHL8rCL GR8:$src1)>; -def : Pat<(shl GR16:$src1, (and CL:$amt, 31)), +def : Pat<(shl GR16:$src1, (and CL, 31)), (SHL16rCL GR16:$src1)>; -def : Pat<(shl GR32:$src1, (and CL:$amt, 31)), +def : Pat<(shl GR32:$src1, (and CL, 31)), (SHL32rCL GR32:$src1)>; -def : Pat<(store (shl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst), (SHL8mCL addr:$dst)>; -def : Pat<(store (shl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst), (SHL16mCL addr:$dst)>; -def : Pat<(store (shl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst), (SHL32mCL addr:$dst)>; -def : Pat<(srl GR8:$src1, (and CL:$amt, 31)), +def : Pat<(srl GR8:$src1, (and CL, 31)), (SHR8rCL GR8:$src1)>; -def : Pat<(srl GR16:$src1, (and CL:$amt, 31)), +def : Pat<(srl GR16:$src1, (and CL, 31)), (SHR16rCL GR16:$src1)>; -def : Pat<(srl GR32:$src1, (and CL:$amt, 31)), +def : Pat<(srl GR32:$src1, (and CL, 31)), (SHR32rCL GR32:$src1)>; -def : Pat<(store (srl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst), (SHR8mCL addr:$dst)>; -def : Pat<(store (srl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst), (SHR16mCL addr:$dst)>; -def : Pat<(store (srl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst), (SHR32mCL addr:$dst)>; -def : Pat<(sra GR8:$src1, (and CL:$amt, 31)), +def : Pat<(sra GR8:$src1, (and CL, 31)), (SAR8rCL GR8:$src1)>; -def : Pat<(sra GR16:$src1, (and CL:$amt, 31)), +def : Pat<(sra GR16:$src1, (and CL, 31)), (SAR16rCL GR16:$src1)>; -def : Pat<(sra GR32:$src1, (and CL:$amt, 31)), +def : Pat<(sra GR32:$src1, (and CL, 31)), (SAR32rCL GR32:$src1)>; -def : Pat<(store (sra (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst), (SAR8mCL addr:$dst)>; -def : Pat<(store (sra (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst), (SAR16mCL addr:$dst)>; -def : Pat<(store (sra (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst), (SAR32mCL addr:$dst)>; // (or (x >> c) | (y << (32 - c))) ==> (shrd32 x, y, c) @@ -4620,11 +4570,11 @@ def : Pat<(store (or (srl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))), addr:$dst), (SHRD32mrCL addr:$dst, GR32:$src2)>; -def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)), +def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm/*:$amt2*/)), (SHRD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>; def : Pat<(store (shrd (loadi32 addr:$dst), (i8 imm:$amt1), - GR32:$src2, (i8 imm:$amt2)), addr:$dst), + GR32:$src2, (i8 imm/*:$amt2*/)), addr:$dst), (SHRD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>; // (or (x << c) | (y >> (32 - c))) ==> (shld32 x, y, c) @@ -4645,11 +4595,11 @@ def : Pat<(store (or (shl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))), addr:$dst), (SHLD32mrCL addr:$dst, GR32:$src2)>; -def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)), +def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm/*:$amt2*/)), (SHLD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>; def : Pat<(store (shld (loadi32 addr:$dst), (i8 imm:$amt1), - GR32:$src2, (i8 imm:$amt2)), addr:$dst), + GR32:$src2, (i8 imm/*:$amt2*/)), addr:$dst), (SHLD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>; // (or (x >> c) | (y << (16 - c))) ==> (shrd16 x, y, c) @@ -4670,11 +4620,11 @@ def : Pat<(store (or (srl (loadi16 addr:$dst), (i8 (trunc CX:$amt))), addr:$dst), (SHRD16mrCL addr:$dst, GR16:$src2)>; -def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)), +def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm/*:$amt2*/)), (SHRD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>; def : Pat<(store (shrd (loadi16 addr:$dst), (i8 imm:$amt1), - GR16:$src2, (i8 imm:$amt2)), addr:$dst), + GR16:$src2, (i8 imm/*:$amt2*/)), addr:$dst), (SHRD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>; // (or (x << c) | (y >> (16 - c))) ==> (shld16 x, y, c) @@ -4695,11 +4645,11 @@ def : Pat<(store (or (shl (loadi16 addr:$dst), (i8 (trunc CX:$amt))), addr:$dst), (SHLD16mrCL addr:$dst, GR16:$src2)>; -def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)), +def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm/*:$amt2*/)), (SHLD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>; def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1), - GR16:$src2, (i8 imm:$amt2)), addr:$dst), + GR16:$src2, (i8 imm/*:$amt2*/)), addr:$dst), (SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>; // (anyext (setcc_carry)) -> (setcc_carry) diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 89f020c..c8e0723 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -141,7 +141,7 @@ def MMX_MOVD64rrv164 : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), let neverHasSideEffects = 1 in def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), "movq\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (load_mmx addr:$src))]>; @@ -426,13 +426,15 @@ def MMX_CVTTPS2PIrm : MMXI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src), // Extract / Insert -def MMX_X86pextrw : SDNode<"X86ISD::PEXTRW", SDTypeProfile<1, 2, []>, []>; -def MMX_X86pinsrw : SDNode<"X86ISD::PINSRW", SDTypeProfile<1, 3, []>, []>; +def MMX_X86pinsrw : SDNode<"X86ISD::MMX_PINSRW", + SDTypeProfile<1, 3, [SDTCisVT<0, v4i16>, SDTCisSameAs<0,1>, + SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; + def MMX_PEXTRWri : MMXIi8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src1, i16i8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, (MMX_X86pextrw (v4i16 VR64:$src1), + [(set GR32:$dst, (X86pextrw (v4i16 VR64:$src1), (iPTR imm:$src2)))]>; let Constraints = "$src1 = $dst" in { def MMX_PINSRWrri : MMXIi8<0xC4, MRMSrcReg, @@ -597,13 +599,6 @@ let AddedComplexity = 10 in { (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>; } -// Patterns to perform vector shuffling with a zeroed out vector. -let AddedComplexity = 20 in { - def : Pat<(bc_v2i32 (mmx_unpckl immAllZerosV, - (v2i32 (scalar_to_vector (load_mmx addr:$src))))), - (MMX_PUNPCKLDQrm VR64:$src, VR64:$src)>; -} - // Some special case PANDN patterns. // FIXME: Get rid of these. def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index e26c979..2743dba 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -160,6 +160,32 @@ def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>; def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>; def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>; +// MOVNT Support +// Like 'store', but requires the non-temporal bit to be set +def nontemporalstore : PatFrag<(ops node:$val, node:$ptr), + (st node:$val, node:$ptr), [{ + if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) + return ST->isNonTemporal(); + return false; +}]>; + +def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), + (st node:$val, node:$ptr), [{ + if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) + return ST->isNonTemporal() && !ST->isTruncatingStore() && + ST->getAddressingMode() == ISD::UNINDEXED && + ST->getAlignment() >= 16; + return false; +}]>; + +def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), + (st node:$val, node:$ptr), [{ + if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) + return ST->isNonTemporal() && + ST->getAlignment() < 16; + return false; +}]>; + def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>; def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>; def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>; @@ -344,18 +370,56 @@ let Uses = [EFLAGS], usesCustomInserter = 1 in { // SSE1 Instructions //===----------------------------------------------------------------------===// -// Move Instructions -let neverHasSideEffects = 1 in -def MOVSSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), - "movss\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +// Move Instructions. Register-to-register movss is not used for FR32 +// register copies because it's a partial register update; FsMOVAPSrr is +// used instead. Register-to-register movss is not modeled as an INSERT_SUBREG +// because INSERT_SUBREG requires that the insert be implementable in terms of +// a copy, and just mentioned, we don't use movss for copies. +let Constraints = "$src1 = $dst" in +def MOVSSrr : SSI<0x10, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, FR32:$src2), + "movss\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (movl VR128:$src1, (scalar_to_vector FR32:$src2)))]>; + +// Extract the low 32-bit value from one vector and insert it into another. +let AddedComplexity = 15 in +def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)), + (MOVSSrr VR128:$src1, + (EXTRACT_SUBREG (v4f32 VR128:$src2), x86_subreg_ss))>; + +// Implicitly promote a 32-bit scalar to a vector. +def : Pat<(v4f32 (scalar_to_vector FR32:$src)), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, x86_subreg_ss)>; + +// Loading from memory automatically zeroing upper bits. +let canFoldAsLoad = 1, isReMaterializable = 1 in def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), "movss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (loadf32 addr:$src))]>; + +// MOVSSrm zeros the high parts of the register; represent this +// with SUBREG_TO_REG. +let AddedComplexity = 20 in { +def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>; +def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>; +def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>; +} + +// Store scalar value to memory. def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), "movss\t{$src, $dst|$dst, $src}", [(store FR32:$src, addr:$dst)]>; +// Extract and store. +def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), + addr:$dst), + (MOVSSmr addr:$dst, + (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>; + // Conversion instructions def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src), "cvttss2si\t{$src, $dst|$dst, $src}", @@ -518,7 +582,7 @@ def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), // Alias instruction to load FR32 from f128mem using movaps. Upper bits are // disregarded. -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), "movaps\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>; @@ -715,7 +779,7 @@ defm MIN : sse1_fp_binop_rm<0x5D, "min", X86fmin, let neverHasSideEffects = 1 in def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def MOVAPSrm : PSI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movaps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (alignedloadv4f32 addr:$src))]>; @@ -727,7 +791,7 @@ def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), let neverHasSideEffects = 1 in def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movups\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movups\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (loadv4f32 addr:$src))]>; @@ -736,7 +800,7 @@ def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), [(store (v4f32 VR128:$src), addr:$dst)]>; // Intrinsic forms of MOVUPS load and store -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movups\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>; @@ -762,6 +826,9 @@ let Constraints = "$src1 = $dst" in { } // Constraints = "$src1 = $dst" +def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), + (MOVHPSrm VR128:$src1, addr:$src2)>; + def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), @@ -793,9 +860,9 @@ def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), let AddedComplexity = 20 in { def : Pat<(v4f32 (movddup VR128:$src, (undef))), - (MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; + (MOVLHPSrr VR128:$src, VR128:$src)>; def : Pat<(v2i64 (movddup VR128:$src, (undef))), - (MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; + (MOVLHPSrr VR128:$src, VR128:$src)>; } @@ -1010,10 +1077,33 @@ def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>; // Non-temporal stores -def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), +def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movntps\t{$src, $dst|$dst, $src}", [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>; +let AddedComplexity = 400 in { // Prefer non-temporal versions +def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; + +def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>; + +def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), + (MOVNTDQ_64mr VR128:$src, addr:$dst)>; + +def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), + "movnti\t{$src, $dst|$dst, $src}", + [(nontemporalstore (i32 GR32:$src), addr:$dst)]>, + TB, Requires<[HasSSE2]>; + +def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + "movnti\t{$src, $dst|$dst, $src}", + [(nontemporalstore (i64 GR64:$src), addr:$dst)]>, + TB, Requires<[HasSSE2]>; +} + // Load, store, and memory fence def SFENCE : PSI<0xAE, MRM7r, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>; @@ -1032,84 +1122,73 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllZerosV))]>; -let Predicates = [HasSSE1] in { - def : Pat<(v2i64 immAllZerosV), (V_SET0)>; - def : Pat<(v8i16 immAllZerosV), (V_SET0)>; - def : Pat<(v16i8 immAllZerosV), (V_SET0)>; - def : Pat<(v2f64 immAllZerosV), (V_SET0)>; - def : Pat<(v4f32 immAllZerosV), (V_SET0)>; -} - -// FR32 to 128-bit vector conversion. -let isAsCheapAsAMove = 1 in -def MOVSS2PSrr : SSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR32:$src), - "movss\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v4f32 (scalar_to_vector FR32:$src)))]>; -def MOVSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src), - "movss\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>; - -// FIXME: may not be able to eliminate this movss with coalescing the src and -// dest register classes are different. We really want to write this pattern -// like this: -// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), -// (f32 FR32:$src)>; -let isAsCheapAsAMove = 1 in -def MOVPS2SSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins VR128:$src), - "movss\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (vector_extract (v4f32 VR128:$src), - (iPTR 0)))]>; -def MOVPS2SSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src), - "movss\t{$src, $dst|$dst, $src}", - [(store (f32 (vector_extract (v4f32 VR128:$src), - (iPTR 0))), addr:$dst)]>; - - -// Move to lower bits of a VR128, leaving upper bits alone. -// Three operand (but two address) aliases. -let Constraints = "$src1 = $dst" in { -let neverHasSideEffects = 1 in - def MOVLSS2PSrr : SSI<0x10, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, FR32:$src2), - "movss\t{$src2, $dst|$dst, $src2}", []>; - - let AddedComplexity = 15 in - def MOVLPSrr : SSI<0x10, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "movss\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (movl VR128:$src1, VR128:$src2)))]>; -} +def : Pat<(v2i64 immAllZerosV), (V_SET0)>; +def : Pat<(v8i16 immAllZerosV), (V_SET0)>; +def : Pat<(v16i8 immAllZerosV), (V_SET0)>; +def : Pat<(v2f64 immAllZerosV), (V_SET0)>; +def : Pat<(v4f32 immAllZerosV), (V_SET0)>; -// Move to lower bits of a VR128 and zeroing upper bits. -// Loading from memory automatically zeroing upper bits. -let AddedComplexity = 20 in -def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src), - "movss\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v4f32 (X86vzmovl (v4f32 (scalar_to_vector - (loadf32 addr:$src))))))]>; - -def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), - (MOVZSS2PSrm addr:$src)>; +def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), + (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>; //===---------------------------------------------------------------------===// // SSE2 Instructions //===---------------------------------------------------------------------===// -// Move Instructions -let neverHasSideEffects = 1 in -def MOVSDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), - "movsd\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +// Move Instructions. Register-to-register movsd is not used for FR64 +// register copies because it's a partial register update; FsMOVAPDrr is +// used instead. Register-to-register movsd is not modeled as an INSERT_SUBREG +// because INSERT_SUBREG requires that the insert be implementable in terms of +// a copy, and just mentioned, we don't use movsd for copies. +let Constraints = "$src1 = $dst" in +def MOVSDrr : SDI<0x10, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, FR64:$src2), + "movsd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (movl VR128:$src1, (scalar_to_vector FR64:$src2)))]>; + +// Extract the low 64-bit value from one vector and insert it into another. +let AddedComplexity = 15 in +def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, + (EXTRACT_SUBREG (v2f64 VR128:$src2), x86_subreg_sd))>; + +// Implicitly promote a 64-bit scalar to a vector. +def : Pat<(v2f64 (scalar_to_vector FR64:$src)), + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, x86_subreg_sd)>; + +// Loading from memory automatically zeroing upper bits. +let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 20 in def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), "movsd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (loadf64 addr:$src))]>; + +// MOVSDrm zeros the high parts of the register; represent this +// with SUBREG_TO_REG. +let AddedComplexity = 20 in { +def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; +def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; +def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; +def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; +def : Pat<(v2f64 (X86vzload addr:$src)), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; +} + +// Store scalar value to memory. def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), "movsd\t{$src, $dst|$dst, $src}", [(store FR64:$src, addr:$dst)]>; +// Extract and store. +def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), + addr:$dst), + (MOVSDmr addr:$dst, + (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>; + // Conversion instructions def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src), "cvttsd2si\t{$src, $dst|$dst, $src}", @@ -1163,7 +1242,8 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), Requires<[HasSSE2, OptForSize]>; def : Pat<(extloadf32 addr:$src), - (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>; + (CVTSS2SDrr (MOVSSrm addr:$src))>, + Requires<[HasSSE2, OptForSpeed]>; // Match intrinsics which expect XMM operand(s). def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), @@ -1282,7 +1362,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), // Alias instruction to load FR64 from f128mem using movapd. Upper bits are // disregarded. -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), "movapd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>; @@ -1480,7 +1560,7 @@ defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin, let neverHasSideEffects = 1 in def MOVAPDrr : PDI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movapd\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def MOVAPDrm : PDI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movapd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (alignedloadv2f64 addr:$src))]>; @@ -2295,34 +2375,47 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>; // Non-temporal stores -def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>; -def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>; -def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), +def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>; +def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>; +def MOVNTImr_Int : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "movnti\t{$src, $dst|$dst, $src}", [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>, TB, Requires<[HasSSE2]>; +let AddedComplexity = 400 in { // Prefer non-temporal versions +def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>; + +def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; + +def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst), + (MOVNTDQmr VR128:$src, addr:$dst)>; +} + // Flush cache def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>, TB, Requires<[HasSSE2]>; // Load, store, and memory fence -def LFENCE : I<0xAE, MRM5r, (outs), (ins), +def LFENCE : I<0xAE, MRM_E8, (outs), (ins), "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>; -def MFENCE : I<0xAE, MRM6r, (outs), (ins), +def MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; //TODO: custom lower this so as to never even generate the noop -def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss), +def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 0)), (NOOP)>; def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>; def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>; -def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss), +def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)), (MFENCE)>; // Alias instructions that map zero vector to pxor / xorp* for sse. @@ -2334,17 +2427,6 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllOnesV))]>; -// FR64 to 128-bit vector conversion. -let isAsCheapAsAMove = 1 in -def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR64:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v2f64 (scalar_to_vector FR64:$src)))]>; -def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>; - def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -2373,20 +2455,9 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), [(store (i64 (vector_extract (v2i64 VR128:$src), (iPTR 0))), addr:$dst)]>; -// FIXME: may not be able to eliminate this movss with coalescing the src and -// dest register classes are different. We really want to write this pattern -// like this: -// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), -// (f32 FR32:$src)>; -let isAsCheapAsAMove = 1 in -def MOVPD2SDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins VR128:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (vector_extract (v2f64 VR128:$src), - (iPTR 0)))]>; -def MOVPD2SDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(store (f64 (vector_extract (v2f64 VR128:$src), - (iPTR 0))), addr:$dst)]>; +def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), + (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>; + def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), @@ -2403,44 +2474,11 @@ def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>; - -// Move to lower bits of a VR128, leaving upper bits alone. -// Three operand (but two address) aliases. -let Constraints = "$src1 = $dst" in { - let neverHasSideEffects = 1 in - def MOVLSD2PDrr : SDI<0x10, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, FR64:$src2), - "movsd\t{$src2, $dst|$dst, $src2}", []>; - - let AddedComplexity = 15 in - def MOVLPDrr : SDI<0x10, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "movsd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (movl VR128:$src1, VR128:$src2)))]>; -} - // Store / copy lower 64-bits of a XMM register. def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>; -// Move to lower bits of a VR128 and zeroing upper bits. -// Loading from memory automatically zeroing upper bits. -let AddedComplexity = 20 in { -def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v2f64 (X86vzmovl (v2f64 (scalar_to_vector - (loadf64 addr:$src))))))]>; - -def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), - (MOVZSD2PDrm addr:$src)>; -def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), - (MOVZSD2PDrm addr:$src)>; -def : Pat<(v2f64 (X86vzload addr:$src)), (MOVZSD2PDrm addr:$src)>; -} - // movd / movq to XMM register zero-extends let AddedComplexity = 15 in { def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), @@ -2613,9 +2651,9 @@ let Constraints = "$src1 = $dst" in { } // Thread synchronization -def MONITOR : I<0x01, MRM1r, (outs), (ins), "monitor", +def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor", [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>; -def MWAIT : I<0x01, MRM1r, (outs), (ins), "mwait", +def MWAIT : I<0x01, MRM_C9, (outs), (ins), "mwait", [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>; // vector_shuffle v1, <undef> <1, 1, 3, 3> @@ -2986,13 +3024,15 @@ let Predicates = [HasSSE2] in { let AddedComplexity = 15 in { // Zeroing a VR128 then do a MOVS{S|D} to the lower bits. def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), - (MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>; + (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>; def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), - (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE1]>; + (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>; def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), - (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE1]>; + (MOVSSrr (v4f32 (V_SET0)), + (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss)))>; def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), - (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE1]>; + (MOVSSrr (v4i32 (V_SET0)), + (EXTRACT_SUBREG (v4i32 VR128:$src), x86_subreg_ss))>; } // Splat v2f64 / v2i64 @@ -3010,8 +3050,7 @@ def : Pat<(unpckh (v2i64 VR128:$src), (undef)), // Special unary SHUFPSrri case. def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))), (SHUFPSrri VR128:$src1, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>, - Requires<[HasSSE1]>; + (SHUFFLE_get_shuf_imm VR128:$src3))>; let AddedComplexity = 5 in def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))), (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>, @@ -3057,13 +3096,13 @@ def : Pat<(v4f32 (unpckl_undef:$src2 VR128:$src, (undef))), } let AddedComplexity = 10 in { def : Pat<(v4f32 (unpckl_undef VR128:$src, (undef))), - (UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; + (UNPCKLPSrr VR128:$src, VR128:$src)>; def : Pat<(v16i8 (unpckl_undef VR128:$src, (undef))), - (PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; + (PUNPCKLBWrr VR128:$src, VR128:$src)>; def : Pat<(v8i16 (unpckl_undef VR128:$src, (undef))), - (PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; + (PUNPCKLWDrr VR128:$src, VR128:$src)>; def : Pat<(v4i32 (unpckl_undef VR128:$src, (undef))), - (PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; + (PUNPCKLDQrr VR128:$src, VR128:$src)>; } // vector_shuffle v1, <undef>, <2, 2, 3, 3, ...> @@ -3077,13 +3116,13 @@ def : Pat<(v4f32 (unpckh_undef:$src2 VR128:$src, (undef))), } let AddedComplexity = 10 in { def : Pat<(v4f32 (unpckh_undef VR128:$src, (undef))), - (UNPCKHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; + (UNPCKHPSrr VR128:$src, VR128:$src)>; def : Pat<(v16i8 (unpckh_undef VR128:$src, (undef))), - (PUNPCKHBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; + (PUNPCKHBWrr VR128:$src, VR128:$src)>; def : Pat<(v8i16 (unpckh_undef VR128:$src, (undef))), - (PUNPCKHWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; + (PUNPCKHWDrr VR128:$src, VR128:$src)>; def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))), - (PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; + (PUNPCKHDQrr VR128:$src, VR128:$src)>; } let AddedComplexity = 20 in { @@ -3105,45 +3144,49 @@ def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))), let AddedComplexity = 20 in { // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; + (MOVLPSrm VR128:$src1, addr:$src2)>; def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; + (MOVLPDrm VR128:$src1, addr:$src2)>; def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; + (MOVLPSrm VR128:$src1, addr:$src2)>; def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; + (MOVLPDrm VR128:$src1, addr:$src2)>; } // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; + (MOVLPSmr addr:$src1, VR128:$src2)>; def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + (MOVLPDmr addr:$src1, VR128:$src2)>; def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1), - (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; + (MOVLPSmr addr:$src1, VR128:$src2)>; def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + (MOVLPDmr addr:$src1, VR128:$src2)>; let AddedComplexity = 15 in { // Setting the lowest element in the vector. def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)), - (MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + (MOVSSrr (v4i32 VR128:$src1), + (EXTRACT_SUBREG (v4i32 VR128:$src2), x86_subreg_ss))>; def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)), - (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + (MOVSDrr (v2i64 VR128:$src1), + (EXTRACT_SUBREG (v2i64 VR128:$src2), x86_subreg_sd))>; -// vector_shuffle v1, v2 <4, 5, 2, 3> using MOVLPDrr (movsd) +// vector_shuffle v1, v2 <4, 5, 2, 3> using movsd def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)), - (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>, + Requires<[HasSSE2]>; def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)), - (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>, + Requires<[HasSSE2]>; } // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but // fall back to this for SSE1) def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))), (SHUFPSrri VR128:$src2, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE1]>; + (SHUFFLE_get_shuf_imm VR128:$src3))>; // Set lowest element and zero upper elements. let AddedComplexity = 15 in @@ -3185,30 +3228,30 @@ def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))), // Use movaps / movups for SSE integer load / store (one byte shorter). def : Pat<(alignedloadv4i32 addr:$src), - (MOVAPSrm addr:$src)>, Requires<[HasSSE1]>; + (MOVAPSrm addr:$src)>; def : Pat<(loadv4i32 addr:$src), - (MOVUPSrm addr:$src)>, Requires<[HasSSE1]>; + (MOVUPSrm addr:$src)>; def : Pat<(alignedloadv2i64 addr:$src), - (MOVAPSrm addr:$src)>, Requires<[HasSSE2]>; + (MOVAPSrm addr:$src)>; def : Pat<(loadv2i64 addr:$src), - (MOVUPSrm addr:$src)>, Requires<[HasSSE2]>; + (MOVUPSrm addr:$src)>; def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVAPSmr addr:$dst, VR128:$src)>; def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVAPSmr addr:$dst, VR128:$src)>; def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVAPSmr addr:$dst, VR128:$src)>; def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVAPSmr addr:$dst, VR128:$src)>; def : Pat<(store (v2i64 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVUPSmr addr:$dst, VR128:$src)>; def : Pat<(store (v4i32 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVUPSmr addr:$dst, VR128:$src)>; def : Pat<(store (v8i16 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVUPSmr addr:$dst, VR128:$src)>; def : Pat<(store (v16i8 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVUPSmr addr:$dst, VR128:$src)>; //===----------------------------------------------------------------------===// // SSE4.1 Instructions @@ -3397,7 +3440,7 @@ let Constraints = "$src1 = $dst" in { (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, - (OpNode VR128:$src1, (memop addr:$src2)))]>, OpSize; + (OpVT (OpNode VR128:$src1, (memop addr:$src2))))]>, OpSize; def rm_int : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp index d3b0052..250634f 100644 --- a/lib/Target/X86/X86MCAsmInfo.cpp +++ b/lib/Target/X86/X86MCAsmInfo.cpp @@ -55,6 +55,11 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) { if (!is64Bit) Data64bitsDirective = 0; // we can't emit a 64-bit unit + // Use ## as a comment string so that .s files generated by llvm can go + // through the GCC preprocessor without causing an error. This is needed + // because "clang foo.s" runs the C preprocessor, which is usually reserved + // for .S files on other systems. Perhaps this is because the file system + // wasn't always case preserving or something. CommentString = "##"; PCSymbol = "."; @@ -70,6 +75,8 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &Triple) { AsmTransCBE = x86_asm_table; AssemblerDialect = AsmWriterFlavor; + TextAlignFillValue = 0x90; + PrivateGlobalPrefix = ".L"; WeakRefDirective = "\t.weak\t"; PCSymbol = "."; @@ -94,27 +101,6 @@ MCSection *X86ELFMCAsmInfo::getNonexecutableStackSection(MCContext &Ctx) const { X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) { AsmTransCBE = x86_asm_table; AssemblerDialect = AsmWriterFlavor; -} - - -X86WinMCAsmInfo::X86WinMCAsmInfo(const Triple &Triple) { - AsmTransCBE = x86_asm_table; - AssemblerDialect = AsmWriterFlavor; - GlobalPrefix = "_"; - CommentString = ";"; - - PrivateGlobalPrefix = "$"; - AlignDirective = "\tALIGN\t"; - ZeroDirective = "\tdb\t"; - AsciiDirective = "\tdb\t"; - AscizDirective = 0; - Data8bitsDirective = "\tdb\t"; - Data16bitsDirective = "\tdw\t"; - Data32bitsDirective = "\tdd\t"; - Data64bitsDirective = "\tdq\t"; - HasDotTypeDotSizeDirective = false; - HasSingleParameterDotFile = false; - - AlignmentIsInBytes = true; + TextAlignFillValue = 0x90; } diff --git a/lib/Target/X86/X86MCAsmInfo.h b/lib/Target/X86/X86MCAsmInfo.h index ca227b7..69716bf 100644 --- a/lib/Target/X86/X86MCAsmInfo.h +++ b/lib/Target/X86/X86MCAsmInfo.h @@ -33,11 +33,6 @@ namespace llvm { struct X86MCAsmInfoCOFF : public MCAsmInfoCOFF { explicit X86MCAsmInfoCOFF(const Triple &Triple); }; - - struct X86WinMCAsmInfo : public MCAsmInfo { - explicit X86WinMCAsmInfo(const Triple &Triple); - }; - } // namespace llvm #endif diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index 764c87a..3f18696 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -14,53 +14,44 @@ #define DEBUG_TYPE "x86-emitter" #include "X86.h" #include "X86InstrInfo.h" +#include "X86FixupKinds.h" #include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -// FIXME: This should move to a header. -namespace llvm { -namespace X86 { -enum Fixups { - reloc_pcrel_word = FirstTargetFixupKind, - reloc_picrel_word, - reloc_absolute_word, - reloc_absolute_word_sext, - reloc_absolute_dword -}; -} -} - namespace { class X86MCCodeEmitter : public MCCodeEmitter { X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT const TargetMachine &TM; const TargetInstrInfo &TII; + MCContext &Ctx; bool Is64BitMode; public: - X86MCCodeEmitter(TargetMachine &tm, bool is64Bit) - : TM(tm), TII(*TM.getInstrInfo()) { + X86MCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit) + : TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) { Is64BitMode = is64Bit; } ~X86MCCodeEmitter() {} unsigned getNumFixupKinds() const { - return 5; + return 3; } - MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { - static MCFixupKindInfo Infos[] = { - { "reloc_pcrel_word", 0, 4 * 8 }, - { "reloc_picrel_word", 0, 4 * 8 }, - { "reloc_absolute_word", 0, 4 * 8 }, - { "reloc_absolute_word_sext", 0, 4 * 8 }, - { "reloc_absolute_dword", 0, 8 * 8 } + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo Infos[] = { + { "reloc_pcrel_4byte", 0, 4 * 8 }, + { "reloc_pcrel_1byte", 0, 1 * 8 }, + { "reloc_riprel_4byte", 0, 4 * 8 } }; + + if (Kind < FirstTargetFixupKind) + return MCCodeEmitter::getFixupKindInfo(Kind); - assert(Kind >= FirstTargetFixupKind && Kind < MaxTargetFixupKind && + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && "Invalid kind!"); return Infos[Kind - FirstTargetFixupKind]; } @@ -83,9 +74,11 @@ public: } } - void EmitDisplacementField(const MCOperand &Disp, int64_t Adj, bool IsPCRel, - unsigned &CurByte, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups) const; + void EmitImmediate(const MCOperand &Disp, + unsigned ImmSize, MCFixupKind FixupKind, + unsigned &CurByte, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, + int ImmOffset = 0) const; inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode, unsigned RM) { @@ -106,8 +99,8 @@ public: void EmitMemModRMByte(const MCInst &MI, unsigned Op, - unsigned RegOpcodeField, intptr_t PCAdj, - unsigned &CurByte, raw_ostream &OS, + unsigned RegOpcodeField, + unsigned TSFlags, unsigned &CurByte, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const; void EncodeInstruction(const MCInst &MI, raw_ostream &OS, @@ -119,13 +112,15 @@ public: MCCodeEmitter *llvm::createX86_32MCCodeEmitter(const Target &, - TargetMachine &TM) { - return new X86MCCodeEmitter(TM, false); + TargetMachine &TM, + MCContext &Ctx) { + return new X86MCCodeEmitter(TM, Ctx, false); } MCCodeEmitter *llvm::createX86_64MCCodeEmitter(const Target &, - TargetMachine &TM) { - return new X86MCCodeEmitter(TM, true); + TargetMachine &TM, + MCContext &Ctx) { + return new X86MCCodeEmitter(TM, Ctx, true); } @@ -135,36 +130,59 @@ static bool isDisp8(int Value) { return Value == (signed char)Value; } +/// getImmFixupKind - Return the appropriate fixup kind to use for an immediate +/// in an instruction with the specified TSFlags. +static MCFixupKind getImmFixupKind(unsigned TSFlags) { + unsigned Size = X86II::getSizeOfImm(TSFlags); + bool isPCRel = X86II::isImmPCRel(TSFlags); + + switch (Size) { + default: assert(0 && "Unknown immediate size"); + case 1: return isPCRel ? MCFixupKind(X86::reloc_pcrel_1byte) : FK_Data_1; + case 4: return isPCRel ? MCFixupKind(X86::reloc_pcrel_4byte) : FK_Data_4; + case 2: assert(!isPCRel); return FK_Data_2; + case 8: assert(!isPCRel); return FK_Data_8; + } +} + + void X86MCCodeEmitter:: -EmitDisplacementField(const MCOperand &DispOp, int64_t Adj, bool IsPCRel, - unsigned &CurByte, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups) const { +EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind, + unsigned &CurByte, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const { // If this is a simple integer displacement that doesn't require a relocation, // emit it now. if (DispOp.isImm()) { - EmitConstant(DispOp.getImm(), 4, CurByte, OS); + // FIXME: is this right for pc-rel encoding?? Probably need to emit this as + // a fixup if so. + EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS); return; } -#if 0 - // Otherwise, this is something that requires a relocation. Emit it as such - // now. - unsigned RelocType = Is64BitMode ? - (IsPCRel ? X86::reloc_pcrel_word : X86::reloc_absolute_word_sext) - : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); -#endif + // If we have an immoffset, add it to the expression. + const MCExpr *Expr = DispOp.getExpr(); + + // If the fixup is pc-relative, we need to bias the value to be relative to + // the start of the field, not the end of the field. + if (FixupKind == MCFixupKind(X86::reloc_pcrel_4byte) || + FixupKind == MCFixupKind(X86::reloc_riprel_4byte)) + ImmOffset -= 4; + if (FixupKind == MCFixupKind(X86::reloc_pcrel_1byte)) + ImmOffset -= 1; + + if (ImmOffset) + Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(ImmOffset, Ctx), + Ctx); // Emit a symbolic constant as a fixup and 4 zeros. - Fixups.push_back(MCFixup::Create(CurByte, DispOp.getExpr(), - MCFixupKind(X86::reloc_absolute_word))); - EmitConstant(0, 4, CurByte, OS); + Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind)); + EmitConstant(0, Size, CurByte, OS); } void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, unsigned RegOpcodeField, - intptr_t PCAdj, - unsigned &CurByte, + unsigned TSFlags, unsigned &CurByte, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const{ const MCOperand &Disp = MI.getOperand(Op+3); @@ -172,31 +190,48 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, const MCOperand &Scale = MI.getOperand(Op+1); const MCOperand &IndexReg = MI.getOperand(Op+2); unsigned BaseReg = Base.getReg(); - - // FIXME: Eliminate! - bool IsPCRel = false; + + // Handle %rip relative addressing. + if (BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode + assert(IndexReg.getReg() == 0 && Is64BitMode && + "Invalid rip-relative address"); + EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS); + + // rip-relative addressing is actually relative to the *next* instruction. + // Since an immediate can follow the mod/rm byte for an instruction, this + // means that we need to bias the immediate field of the instruction with + // the size of the immediate field. If we have this case, add it into the + // expression to emit. + int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0; + EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_riprel_4byte), + CurByte, OS, Fixups, -ImmSize); + return; + } + + unsigned BaseRegNo = BaseReg ? GetX86RegNum(Base) : -1U; + // Determine whether a SIB byte is needed. // If no BaseReg, issue a RIP relative instruction only if the MCE can // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table // 2-7) and absolute references. + if (// The SIB byte must be used if there is an index register. IndexReg.getReg() == 0 && - // The SIB byte must be used if the base is ESP/RSP. - BaseReg != X86::ESP && BaseReg != X86::RSP && + // The SIB byte must be used if the base is ESP/RSP/R12, all of which + // encode to an R/M value of 4, which indicates that a SIB byte is + // present. + BaseRegNo != N86::ESP && // If there is no base register and we're in 64-bit mode, we need a SIB // byte to emit an addr that is just 'disp32' (the non-RIP relative form). (!Is64BitMode || BaseReg != 0)) { - if (BaseReg == 0 || // [disp32] in X86-32 mode - BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode + if (BaseReg == 0) { // [disp32] in X86-32 mode EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS); - EmitDisplacementField(Disp, PCAdj, true, CurByte, OS, Fixups); + EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); return; } - unsigned BaseRegNo = GetX86RegNum(Base); - // If the base is not EBP/ESP and there is no displacement, use simple // indirect register encoding, this handles addresses like [EAX]. The // encoding for [EBP] with no displacement means [disp32] so we handle it @@ -209,13 +244,13 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, // Otherwise, if the displacement fits in a byte, encode as [REG+disp8]. if (Disp.isImm() && isDisp8(Disp.getImm())) { EmitByte(ModRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS); - EmitConstant(Disp.getImm(), 1, CurByte, OS); + EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups); return; } // Otherwise, emit the most general non-SIB encoding: [REG+disp32] EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS); - EmitDisplacementField(Disp, PCAdj, IsPCRel, CurByte, OS, Fixups); + EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); return; } @@ -270,9 +305,9 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, // Do we need to output a displacement? if (ForceDisp8) - EmitConstant(Disp.getImm(), 1, CurByte, OS); + EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups); else if (ForceDisp32 || Disp.getImm() != 0) - EmitDisplacementField(Disp, PCAdj, IsPCRel, CurByte, OS, Fixups); + EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); } /// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64 @@ -280,11 +315,11 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, /// size, and 3) use of X86-64 extended registers. static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags, const TargetInstrDesc &Desc) { - unsigned REX = 0; - - // Pseudo instructions do not need REX prefix byte. + // Pseudo instructions never have a rex byte. if ((TSFlags & X86II::FormMask) == X86II::Pseudo) return 0; + + unsigned REX = 0; if (TSFlags & X86II::REX_W) REX |= 1 << 3; @@ -482,52 +517,29 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRMInitReg: assert(0 && "FIXME: Remove this form when the JIT moves to MCCodeEmitter!"); default: errs() << "FORM: " << (TSFlags & X86II::FormMask) << "\n"; - assert(0 && "Unknown FormMask value in X86MCCodeEmitter!"); - case X86II::RawFrm: { + assert(0 && "Unknown FormMask value in X86MCCodeEmitter!"); + case X86II::Pseudo: return; // Pseudo instructions encode to nothing. + case X86II::RawFrm: EmitByte(BaseOpcode, CurByte, OS); - - if (CurOp == NumOps) - break; - - assert(0 && "Unimpl RawFrm expr"); break; - } - case X86II::AddRegFrm: { + case X86II::AddRegFrm: EmitByte(BaseOpcode + GetX86RegNum(MI.getOperand(CurOp++)), CurByte, OS); - if (CurOp == NumOps) - break; - - const MCOperand &MO1 = MI.getOperand(CurOp++); - if (MO1.isImm()) { - unsigned Size = X86II::getSizeOfImm(TSFlags); - EmitConstant(MO1.getImm(), Size, CurByte, OS); - break; - } - - assert(0 && "Unimpl AddRegFrm expr"); break; - } case X86II::MRMDestReg: EmitByte(BaseOpcode, CurByte, OS); EmitRegModRMByte(MI.getOperand(CurOp), GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS); CurOp += 2; - if (CurOp != NumOps) - EmitConstant(MI.getOperand(CurOp++).getImm(), - X86II::getSizeOfImm(TSFlags), CurByte, OS); break; case X86II::MRMDestMem: EmitByte(BaseOpcode, CurByte, OS); EmitMemModRMByte(MI, CurOp, GetX86RegNum(MI.getOperand(CurOp + X86AddrNumOperands)), - 0, CurByte, OS, Fixups); + TSFlags, CurByte, OS, Fixups); CurOp += X86AddrNumOperands + 1; - if (CurOp != NumOps) - EmitConstant(MI.getOperand(CurOp++).getImm(), - X86II::getSizeOfImm(TSFlags), CurByte, OS); break; case X86II::MRMSrcReg: @@ -535,9 +547,6 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, EmitRegModRMByte(MI.getOperand(CurOp+1), GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS); CurOp += 2; - if (CurOp != NumOps) - EmitConstant(MI.getOperand(CurOp++).getImm(), - X86II::getSizeOfImm(TSFlags), CurByte, OS); break; case X86II::MRMSrcMem: { @@ -551,117 +560,78 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, else AddrOperands = X86AddrNumOperands; - // FIXME: What is this actually doing? - intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ? - X86II::getSizeOfImm(TSFlags) : 0; - EmitMemModRMByte(MI, CurOp+1, GetX86RegNum(MI.getOperand(CurOp)), - PCAdj, CurByte, OS, Fixups); + TSFlags, CurByte, OS, Fixups); CurOp += AddrOperands + 1; - if (CurOp != NumOps) - EmitConstant(MI.getOperand(CurOp++).getImm(), - X86II::getSizeOfImm(TSFlags), CurByte, OS); break; } case X86II::MRM0r: case X86II::MRM1r: case X86II::MRM2r: case X86II::MRM3r: case X86II::MRM4r: case X86II::MRM5r: - case X86II::MRM6r: case X86II::MRM7r: { + case X86II::MRM6r: case X86II::MRM7r: EmitByte(BaseOpcode, CurByte, OS); - - // Special handling of lfence, mfence, monitor, and mwait. - // FIXME: This is terrible, they should get proper encoding bits in TSFlags. - if (Opcode == X86::LFENCE || Opcode == X86::MFENCE || - Opcode == X86::MONITOR || Opcode == X86::MWAIT) { - EmitByte(ModRMByte(3, (TSFlags & X86II::FormMask)-X86II::MRM0r, 0), - CurByte, OS); - - switch (Opcode) { - default: break; - case X86::MONITOR: EmitByte(0xC8, CurByte, OS); break; - case X86::MWAIT: EmitByte(0xC9, CurByte, OS); break; - } - } else { - EmitRegModRMByte(MI.getOperand(CurOp++), - (TSFlags & X86II::FormMask)-X86II::MRM0r, - CurByte, OS); - } - - if (CurOp == NumOps) - break; - - const MCOperand &MO1 = MI.getOperand(CurOp++); - if (MO1.isImm()) { - EmitConstant(MO1.getImm(), X86II::getSizeOfImm(TSFlags), CurByte, OS); - break; - } - - assert(0 && "relo unimpl"); -#if 0 - unsigned rt = Is64BitMode ? X86::reloc_pcrel_word - : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); - if (Opcode == X86::MOV64ri32) - rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? - if (MO1.isGlobal()) { - bool Indirect = gvNeedsNonLazyPtr(MO1, TM); - emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, - Indirect); - } else if (MO1.isSymbol()) - emitExternalSymbolAddress(MO1.getSymbolName(), rt); - else if (MO1.isCPI()) - emitConstPoolAddress(MO1.getIndex(), rt); - else if (MO1.isJTI()) - emitJumpTableAddress(MO1.getIndex(), rt); + EmitRegModRMByte(MI.getOperand(CurOp++), + (TSFlags & X86II::FormMask)-X86II::MRM0r, + CurByte, OS); break; -#endif - } case X86II::MRM0m: case X86II::MRM1m: case X86II::MRM2m: case X86II::MRM3m: case X86II::MRM4m: case X86II::MRM5m: - case X86II::MRM6m: case X86II::MRM7m: { - intptr_t PCAdj = 0; - if (CurOp + X86AddrNumOperands != NumOps) { - if (MI.getOperand(CurOp+X86AddrNumOperands).isImm()) - PCAdj = X86II::getSizeOfImm(TSFlags); - else - PCAdj = 4; - } - + case X86II::MRM6m: case X86II::MRM7m: EmitByte(BaseOpcode, CurByte, OS); EmitMemModRMByte(MI, CurOp, (TSFlags & X86II::FormMask)-X86II::MRM0m, - PCAdj, CurByte, OS, Fixups); + TSFlags, CurByte, OS, Fixups); CurOp += X86AddrNumOperands; - - if (CurOp == NumOps) - break; - - const MCOperand &MO = MI.getOperand(CurOp++); - if (MO.isImm()) { - EmitConstant(MO.getImm(), X86II::getSizeOfImm(TSFlags), CurByte, OS); - break; - } - - assert(0 && "relo not handled"); -#if 0 - unsigned rt = Is64BitMode ? X86::reloc_pcrel_word - : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); - if (Opcode == X86::MOV64mi32) - rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? - if (MO.isGlobal()) { - bool Indirect = gvNeedsNonLazyPtr(MO, TM); - emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0, - Indirect); - } else if (MO.isSymbol()) - emitExternalSymbolAddress(MO.getSymbolName(), rt); - else if (MO.isCPI()) - emitConstPoolAddress(MO.getIndex(), rt); - else if (MO.isJTI()) - emitJumpTableAddress(MO.getIndex(), rt); -#endif + break; + case X86II::MRM_C1: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC1, CurByte, OS); + break; + case X86II::MRM_C2: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC2, CurByte, OS); + break; + case X86II::MRM_C3: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC3, CurByte, OS); + break; + case X86II::MRM_C4: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC4, CurByte, OS); + break; + case X86II::MRM_C8: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC8, CurByte, OS); + break; + case X86II::MRM_C9: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC9, CurByte, OS); + break; + case X86II::MRM_E8: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xE8, CurByte, OS); + break; + case X86II::MRM_F0: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xF0, CurByte, OS); + break; + case X86II::MRM_F8: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xF8, CurByte, OS); + break; + case X86II::MRM_F9: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xF9, CurByte, OS); break; } - } + + // If there is a remaining operand, it must be a trailing immediate. Emit it + // according to the right size for the instruction. + if (CurOp != NumOps) + EmitImmediate(MI.getOperand(CurOp++), + X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags), + CurByte, OS, Fixups); #ifndef NDEBUG // FIXME: Verify. diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h index bb53bf1..4b2529b 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.h +++ b/lib/Target/X86/X86MachineFunctionInfo.h @@ -18,12 +18,6 @@ namespace llvm { -enum NameDecorationStyle { - None, - StdCall, - FastCall -}; - /// X86MachineFunctionInfo - This class is derived from MachineFunction and /// contains private X86 target-specific information for each MachineFunction. class X86MachineFunctionInfo : public MachineFunctionInfo { @@ -41,10 +35,6 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// Used on windows platform for stdcall & fastcall name decoration unsigned BytesToPopOnReturn; - /// DecorationStyle - If the function requires additional name decoration, - /// DecorationStyle holds the right way to do so. - NameDecorationStyle DecorationStyle; - /// ReturnAddrIndex - FrameIndex for return slot. int ReturnAddrIndex; @@ -66,7 +56,6 @@ public: X86MachineFunctionInfo() : ForceFramePointer(false), CalleeSavedFrameSize(0), BytesToPopOnReturn(0), - DecorationStyle(None), ReturnAddrIndex(0), TailCallReturnAddrDelta(0), SRetReturnReg(0), @@ -76,7 +65,6 @@ public: : ForceFramePointer(false), CalleeSavedFrameSize(0), BytesToPopOnReturn(0), - DecorationStyle(None), ReturnAddrIndex(0), TailCallReturnAddrDelta(0), SRetReturnReg(0), @@ -91,9 +79,6 @@ public: unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; } void setBytesToPopOnReturn (unsigned bytes) { BytesToPopOnReturn = bytes;} - NameDecorationStyle getDecorationStyle() const { return DecorationStyle; } - void setDecorationStyle(NameDecorationStyle style) { DecorationStyle = style;} - int getRAIndex() const { return ReturnAddrIndex; } void setRAIndex(int Index) { ReturnAddrIndex = Index; } diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 081c6d9..0f4ce37 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -191,6 +191,8 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, return &X86::GR16_NOREXRegClass; else if (A == &X86::GR16_ABCDRegClass) return &X86::GR16_ABCDRegClass; + } else if (B == &X86::FR32RegClass) { + return A; } break; case 2: @@ -207,6 +209,8 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass || A == &X86::GR16_NOREXRegClass) return &X86::GR16_ABCDRegClass; + } else if (B == &X86::FR64RegClass) { + return A; } break; case 3: @@ -234,6 +238,8 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, return &X86::GR32_NOREXRegClass; else if (A == &X86::GR32_ABCDRegClass) return &X86::GR64_ABCDRegClass; + } else if (B == &X86::VR128RegClass) { + return A; } break; case 4: @@ -446,8 +452,10 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); + const Function *F = MF.getFunction(); bool requiresRealignment = - RealignStack && (MFI->getMaxAlignment() > StackAlign); + RealignStack && ((MFI->getMaxAlignment() > StackAlign) || + F->hasFnAttr(Attribute::StackAlignment)); // FIXME: Currently we don't support stack realignment for functions with // variable-sized allocas. @@ -485,7 +493,7 @@ X86RegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const { Offset += SlotSize; } else { unsigned Align = MFI->getObjectAlignment(FI); - assert( (-(Offset + StackSize)) % Align == 0); + assert((-(Offset + StackSize)) % Align == 0); Align = 0; return Offset + StackSize; } @@ -627,10 +635,6 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { MachineFrameInfo *MFI = MF.getFrameInfo(); - // Calculate and set max stack object alignment early, so we can decide - // whether we will need stack realignment (and thus FP). - MFI->calculateMaxStackAlignment(); - X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); @@ -1053,7 +1057,8 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) .addImm(NumBytes); BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) - .addExternalSymbol("_alloca"); + .addExternalSymbol("_alloca") + .addReg(StackPtr, RegState::Define | RegState::Implicit); } else { // Save EAX BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) @@ -1064,7 +1069,8 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) .addImm(NumBytes - 4); BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) - .addExternalSymbol("_alloca"); + .addExternalSymbol("_alloca") + .addReg(StackPtr, RegState::Define | RegState::Implicit); // Restore EAX MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 8fb5e92..e4bdb4e 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -35,7 +35,8 @@ namespace X86 { /// these indices must be kept in sync with the class indices in the /// X86RegisterInfo.td file. enum SubregIndex { - SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4 + SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4, + SUBREG_SS = 1, SUBREG_SD = 2, SUBREG_XMM = 3 }; } diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 1559bf7..ed2ce6c 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -158,22 +158,22 @@ let Namespace = "X86" in { def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>; // YMM Registers, used by AVX instructions - def YMM0: Register<"ymm0">, DwarfRegNum<[17, 21, 21]>; - def YMM1: Register<"ymm1">, DwarfRegNum<[18, 22, 22]>; - def YMM2: Register<"ymm2">, DwarfRegNum<[19, 23, 23]>; - def YMM3: Register<"ymm3">, DwarfRegNum<[20, 24, 24]>; - def YMM4: Register<"ymm4">, DwarfRegNum<[21, 25, 25]>; - def YMM5: Register<"ymm5">, DwarfRegNum<[22, 26, 26]>; - def YMM6: Register<"ymm6">, DwarfRegNum<[23, 27, 27]>; - def YMM7: Register<"ymm7">, DwarfRegNum<[24, 28, 28]>; - def YMM8: Register<"ymm8">, DwarfRegNum<[25, -2, -2]>; - def YMM9: Register<"ymm9">, DwarfRegNum<[26, -2, -2]>; - def YMM10: Register<"ymm10">, DwarfRegNum<[27, -2, -2]>; - def YMM11: Register<"ymm11">, DwarfRegNum<[28, -2, -2]>; - def YMM12: Register<"ymm12">, DwarfRegNum<[29, -2, -2]>; - def YMM13: Register<"ymm13">, DwarfRegNum<[30, -2, -2]>; - def YMM14: Register<"ymm14">, DwarfRegNum<[31, -2, -2]>; - def YMM15: Register<"ymm15">, DwarfRegNum<[32, -2, -2]>; + def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegNum<[17, 21, 21]>; + def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegNum<[18, 22, 22]>; + def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegNum<[19, 23, 23]>; + def YMM3: RegisterWithSubRegs<"ymm3", [XMM3]>, DwarfRegNum<[20, 24, 24]>; + def YMM4: RegisterWithSubRegs<"ymm4", [XMM4]>, DwarfRegNum<[21, 25, 25]>; + def YMM5: RegisterWithSubRegs<"ymm5", [XMM5]>, DwarfRegNum<[22, 26, 26]>; + def YMM6: RegisterWithSubRegs<"ymm6", [XMM6]>, DwarfRegNum<[23, 27, 27]>; + def YMM7: RegisterWithSubRegs<"ymm7", [XMM7]>, DwarfRegNum<[24, 28, 28]>; + def YMM8: RegisterWithSubRegs<"ymm8", [XMM8]>, DwarfRegNum<[25, -2, -2]>; + def YMM9: RegisterWithSubRegs<"ymm9", [XMM9]>, DwarfRegNum<[26, -2, -2]>; + def YMM10: RegisterWithSubRegs<"ymm10", [XMM10]>, DwarfRegNum<[27, -2, -2]>; + def YMM11: RegisterWithSubRegs<"ymm11", [XMM11]>, DwarfRegNum<[28, -2, -2]>; + def YMM12: RegisterWithSubRegs<"ymm12", [XMM12]>, DwarfRegNum<[29, -2, -2]>; + def YMM13: RegisterWithSubRegs<"ymm13", [XMM13]>, DwarfRegNum<[30, -2, -2]>; + def YMM14: RegisterWithSubRegs<"ymm14", [XMM14]>, DwarfRegNum<[31, -2, -2]>; + def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegNum<[32, -2, -2]>; // Floating point stack registers def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>; @@ -238,6 +238,10 @@ def x86_subreg_8bit_hi : PatLeaf<(i32 2)>; def x86_subreg_16bit : PatLeaf<(i32 3)>; def x86_subreg_32bit : PatLeaf<(i32 4)>; +def x86_subreg_ss : PatLeaf<(i32 1)>; +def x86_subreg_sd : PatLeaf<(i32 2)>; +def x86_subreg_xmm : PatLeaf<(i32 3)>; + def : SubRegSet<1, [AX, CX, DX, BX, SP, BP, SI, DI, R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W], [AL, CL, DL, BL, SPL, BPL, SIL, DIL, @@ -277,11 +281,31 @@ def : SubRegSet<4, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>; -def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, +def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, + YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15], + [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; + +def : SubRegSet<2, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, + YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15], + [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; + +def : SubRegSet<3, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15], [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; +def : SubRegSet<1, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15], + [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; + +def : SubRegSet<2, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15], + [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; + //===----------------------------------------------------------------------===// // Register Class Definitions... now that we have all of the pieces, define the // top-level register classes. The order specified in the register list is @@ -793,6 +817,7 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]> { + let SubRegClassList = [FR32, FR64]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -811,7 +836,9 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, YMM8, YMM9, YMM10, YMM11, - YMM12, YMM13, YMM14, YMM15]>; + YMM12, YMM13, YMM14, YMM15]> { + let SubRegClassList = [FR32, FR64, VR128]; +} // Status flags registers. def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> { diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 618dd10..594a470 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -20,9 +20,9 @@ namespace llvm { class GlobalValue; class TargetMachine; - + /// PICStyles - The X86 backend supports a number of different styles of PIC. -/// +/// namespace PICStyles { enum Style { StubPIC, // Used on i386-darwin in -fPIC mode. @@ -46,7 +46,7 @@ protected: /// PICStyle - Which PIC style to use /// PICStyles::Style PICStyle; - + /// X86SSELevel - MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or /// none supported. X86SSEEnum X86SSELevel; @@ -58,7 +58,7 @@ protected: /// HasCMov - True if this processor has conditional move instructions /// (generally pentium pro+). bool HasCMov; - + /// HasX86_64 - True if the processor supports X86-64 instructions. /// bool HasX86_64; @@ -78,8 +78,9 @@ protected: /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. bool IsBTMemSlow; - /// HasVectorUAMem - True if SIMD operations can have unaligned memory operands. - /// This may require setting a feature bit in the processor. + /// HasVectorUAMem - True if SIMD operations can have unaligned memory + /// operands. This may require setting a feature bit in the + /// processor. bool HasVectorUAMem; /// DarwinVers - Nonzero if this is a darwin platform: the numeric @@ -150,20 +151,20 @@ public: bool isTargetDarwin() const { return TargetType == isDarwin; } bool isTargetELF() const { return TargetType == isELF; } - + bool isTargetWindows() const { return TargetType == isWindows; } bool isTargetMingw() const { return TargetType == isMingw; } bool isTargetCygwin() const { return TargetType == isCygwin; } bool isTargetCygMing() const { return TargetType == isMingw || TargetType == isCygwin; } - + /// isTargetCOFF - Return true if this is any COFF/Windows target variant. bool isTargetCOFF() const { return TargetType == isMingw || TargetType == isCygwin || TargetType == isWindows; } - + bool isTargetWin64() const { return Is64Bit && (TargetType == isMingw || TargetType == isWindows); } @@ -175,7 +176,7 @@ public: else if (isTargetDarwin()) p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32"; else if (isTargetMingw() || isTargetWindows()) - p = "e-p:32:32-f64:64:64-i64:64:64-f80:128:128-n8:16:32"; + p = "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32"; else p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32"; @@ -196,11 +197,11 @@ public: bool isPICStyleStubAny() const { return PICStyle == PICStyles::StubDynamicNoPIC || PICStyle == PICStyles::StubPIC; } - + /// getDarwinVers - Return the darwin version number, 8 = Tiger, 9 = Leopard, /// 10 = Snow Leopard, etc. unsigned getDarwinVers() const { return DarwinVers; } - + /// ClassifyGlobalReference - Classify a global variable reference for the /// current subtarget according to how we should reference it in a non-pcrel /// context. diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index f835e29..56ddaf8 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -30,9 +30,8 @@ static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { case Triple::MinGW32: case Triple::MinGW64: case Triple::Cygwin: - return new X86MCAsmInfoCOFF(TheTriple); case Triple::Win32: - return new X86WinMCAsmInfo(TheTriple); + return new X86MCAsmInfoCOFF(TheTriple); default: return new X86ELFMCAsmInfo(TheTriple); } @@ -48,11 +47,16 @@ extern "C" void LLVMInitializeX86Target() { RegisterAsmInfoFn B(TheX86_64Target, createMCAsmInfo); // Register the code emitter. - // FIXME: Remove the heinous one when the new one works. TargetRegistry::RegisterCodeEmitter(TheX86_32Target, - createHeinousX86MCCodeEmitter); + createX86_32MCCodeEmitter); TargetRegistry::RegisterCodeEmitter(TheX86_64Target, - createHeinousX86MCCodeEmitter); + createX86_64MCCodeEmitter); + + // Register the asm backend. + TargetRegistry::RegisterAsmBackend(TheX86_32Target, + createX86_32AsmBackend); + TargetRegistry::RegisterAsmBackend(TheX86_64Target, + createX86_64AsmBackend); } @@ -201,32 +205,3 @@ void X86TargetMachine::setCodeModelForJIT() { else setCodeModel(CodeModel::Small); } - -/// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are 4-byte, -/// 8-byte, and target default. The CIE is hard-coded to indicate that the LSDA -/// pointer in the FDE section is an "sdata4", and should be encoded as a 4-byte -/// pointer by default. However, some systems may require a different size due -/// to bugs or other conditions. We will default to a 4-byte encoding unless the -/// system tells us otherwise. -/// -/// The issue is when the CIE says their is an LSDA. That mandates that every -/// FDE have an LSDA slot. But if the function does not need an LSDA. There -/// needs to be some way to signify there is none. The LSDA is encoded as -/// pc-rel. But you don't look for some magic value after adding the pc. You -/// have to look for a zero before adding the pc. The problem is that the size -/// of the zero to look for depends on the encoding. The unwinder bug in SL is -/// that it always checks for a pointer-size zero. So on x86_64 it looks for 8 -/// bytes of zero. If you have an LSDA, it works fine since the 8-bytes are -/// non-zero so it goes ahead and then reads the value based on the encoding. -/// But if you use sdata4 and there is no LSDA, then the test for zero gives a -/// false negative and the unwinder thinks there is an LSDA. -/// -/// FIXME: This call-back isn't good! We should be using the correct encoding -/// regardless of the system. However, there are some systems which have bugs -/// that prevent this from occuring. -DwarfLSDAEncoding::Encoding X86TargetMachine::getLSDAEncoding() const { - if (Subtarget.isTargetDarwin() && Subtarget.getDarwinVers() != 10) - return DwarfLSDAEncoding::Default; - - return DwarfLSDAEncoding::EightByte; -} diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index eee29be..2bb5454 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -62,18 +62,6 @@ public: return Subtarget.isTargetELF() ? &ELFWriterInfo : 0; } - /// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are - /// 4-byte, 8-byte, and target default. The CIE is hard-coded to indicate that - /// the LSDA pointer in the FDE section is an "sdata4", and should be encoded - /// as a 4-byte pointer by default. However, some systems may require a - /// different size due to bugs or other conditions. We will default to a - /// 4-byte encoding unless the system tells us otherwise. - /// - /// FIXME: This call-back isn't good! We should be using the correct encoding - /// regardless of the system. However, there are some systems which have bugs - /// that prevent this from occuring. - virtual DwarfLSDAEncoding::Encoding getLSDAEncoding() const; - // Set up the pass pipeline. virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index b8cef7d..29a0be5 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -7,61 +7,112 @@ // //===----------------------------------------------------------------------===// -#include "X86TargetObjectFile.h" #include "X86MCTargetExpr.h" +#include "X86TargetObjectFile.h" +#include "X86TargetMachine.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/MC/MCContext.h" #include "llvm/Target/Mangler.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Support/Dwarf.h" using namespace llvm; +using namespace dwarf; -const MCExpr *X8632_MachoTargetObjectFile:: +const MCExpr *X8664_MachoTargetObjectFile:: getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, - bool &IsIndirect, bool &IsPCRel) const { - // The mach-o version of this method defaults to returning a stub reference. - IsIndirect = true; - IsPCRel = false; - - - MachineModuleInfoMachO &MachOMMI = - MMI->getObjFileInfo<MachineModuleInfoMachO>(); - - // FIXME: Use GetSymbolWithGlobalValueBase. - SmallString<128> Name; - Mang->getNameWithPrefix(Name, GV, true); - Name += "$non_lazy_ptr"; - - // Add information about the stub reference to MachOMMI so that the stub gets - // emitted by the asmprinter. - MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str()); - MCSymbol *&StubSym = MachOMMI.getGVStubEntry(Sym); - if (StubSym == 0) { - Name.clear(); + MachineModuleInfo *MMI, unsigned Encoding) const { + + // On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which + // is an indirect pc-relative reference. + if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) { + SmallString<128> Name; Mang->getNameWithPrefix(Name, GV, false); - StubSym = getContext().GetOrCreateSymbol(Name.str()); + const MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + const MCExpr *Res = + X86MCTargetExpr::Create(Sym, X86MCTargetExpr::GOTPCREL, getContext()); + const MCExpr *Four = MCConstantExpr::Create(4, getContext()); + return MCBinaryExpr::CreateAdd(Res, Four, getContext()); } - - return MCSymbolRefExpr::Create(Sym, getContext()); + + return TargetLoweringObjectFileMachO:: + getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding); } -const MCExpr *X8664_MachoTargetObjectFile:: -getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, - bool &IsIndirect, bool &IsPCRel) const { - - // On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which - // is an indirect pc-relative reference. - IsIndirect = true; - IsPCRel = true; - - // FIXME: Use GetSymbolWithGlobalValueBase. - SmallString<128> Name; - Mang->getNameWithPrefix(Name, GV, false); - const MCSymbol *Sym = getContext().CreateSymbol(Name); - const MCExpr *Res = - X86MCTargetExpr::Create(Sym, X86MCTargetExpr::GOTPCREL, getContext()); - const MCExpr *Four = MCConstantExpr::Create(4, getContext()); - return MCBinaryExpr::CreateAdd(Res, Four, getContext()); +unsigned X8632_ELFTargetObjectFile::getPersonalityEncoding() const { + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; + else + return DW_EH_PE_absptr; +} + +unsigned X8632_ELFTargetObjectFile::getLSDAEncoding() const { + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_pcrel | DW_EH_PE_sdata4; + else + return DW_EH_PE_absptr; +} + +unsigned X8632_ELFTargetObjectFile::getFDEEncoding() const { + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_pcrel | DW_EH_PE_sdata4; + else + return DW_EH_PE_absptr; +} + +unsigned X8632_ELFTargetObjectFile::getTTypeEncoding() const { + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; + else + return DW_EH_PE_absptr; +} + +unsigned X8664_ELFTargetObjectFile::getPersonalityEncoding() const { + CodeModel::Model Model = TM.getCodeModel(); + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small || + Model == CodeModel::Medium ? + DW_EH_PE_sdata4 : DW_EH_PE_sdata8); + + if (Model == CodeModel::Small || Model == CodeModel::Medium) + return DW_EH_PE_udata4; + + return DW_EH_PE_absptr; +} + +unsigned X8664_ELFTargetObjectFile::getLSDAEncoding() const { + CodeModel::Model Model = TM.getCodeModel(); + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_pcrel | (Model == CodeModel::Small ? + DW_EH_PE_sdata4 : DW_EH_PE_sdata8); + + if (Model == CodeModel::Small) + return DW_EH_PE_udata4; + + return DW_EH_PE_absptr; +} + +unsigned X8664_ELFTargetObjectFile::getFDEEncoding() const { + CodeModel::Model Model = TM.getCodeModel(); + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_pcrel | (Model == CodeModel::Small || + Model == CodeModel::Medium ? + DW_EH_PE_sdata4 : DW_EH_PE_sdata8); + + if (Model == CodeModel::Small || Model == CodeModel::Medium) + return DW_EH_PE_udata4; + + return DW_EH_PE_absptr; } +unsigned X8664_ELFTargetObjectFile::getTTypeEncoding() const { + CodeModel::Model Model = TM.getCodeModel(); + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small || + Model == CodeModel::Medium ? + DW_EH_PE_sdata4 : DW_EH_PE_sdata8); + + if (Model == CodeModel::Small) + return DW_EH_PE_udata4; + + return DW_EH_PE_absptr; +} diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h index 377a93b..0444417 100644 --- a/lib/Target/X86/X86TargetObjectFile.h +++ b/lib/Target/X86/X86TargetObjectFile.h @@ -10,21 +10,13 @@ #ifndef LLVM_TARGET_X86_TARGETOBJECTFILE_H #define LLVM_TARGET_X86_TARGETOBJECTFILE_H +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetLoweringObjectFile.h" namespace llvm { - - /// X8632_MachoTargetObjectFile - This TLOF implementation is used for - /// Darwin/x86-32. - class X8632_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { - public: - - virtual const MCExpr * - getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, - bool &IsIndirect, bool &IsPCRel) const; - }; - + class X86TargetMachine; + /// X8664_MachoTargetObjectFile - This TLOF implementation is used for /// Darwin/x86-64. class X8664_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { @@ -32,9 +24,31 @@ namespace llvm { virtual const MCExpr * getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, - bool &IsIndirect, bool &IsPCRel) const; + MachineModuleInfo *MMI, unsigned Encoding) const; + }; + + class X8632_ELFTargetObjectFile : public TargetLoweringObjectFileELF { + const X86TargetMachine &TM; + public: + X8632_ELFTargetObjectFile(const X86TargetMachine &tm) + :TM(tm) { } + virtual unsigned getPersonalityEncoding() const; + virtual unsigned getLSDAEncoding() const; + virtual unsigned getFDEEncoding() const; + virtual unsigned getTTypeEncoding() const; + }; + + class X8664_ELFTargetObjectFile : public TargetLoweringObjectFileELF { + const X86TargetMachine &TM; + public: + X8664_ELFTargetObjectFile(const X86TargetMachine &tm) + :TM(tm) { } + virtual unsigned getPersonalityEncoding() const; + virtual unsigned getLSDAEncoding() const; + virtual unsigned getFDEEncoding() const; + virtual unsigned getTTypeEncoding() const; }; + } // end namespace llvm #endif |