summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/ExecutionEngine/JIT/JITEmitter.cpp17
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp27
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp21
-rw-r--r--lib/Target/X86/X86JITInfo.cpp5
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp16
-rw-r--r--test/Makefile6
-rw-r--r--unittests/ExecutionEngine/JIT/JITTest.cpp157
-rw-r--r--utils/lit/TestFormats.py5
8 files changed, 187 insertions, 67 deletions
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index a85e11e..5f195ee 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -247,16 +247,6 @@ namespace {
/// specified GV address.
void *getGlobalValueIndirectSym(GlobalValue *V, void *GVAddress);
- /// AddCallbackAtLocation - If the target is capable of rewriting an
- /// instruction without the use of a stub, record the location of the use so
- /// we know which function is being used at the location.
- void *AddCallbackAtLocation(Function *F, void *Location) {
- MutexGuard locked(TheJIT->lock);
- /// Get the target-specific JIT resolver function.
- state.AddCallSite(locked, Location, F);
- return (void*)(intptr_t)LazyResolverFn;
- }
-
void getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs,
SmallVectorImpl<void*> &Ptrs);
@@ -756,13 +746,6 @@ void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference,
!MayNeedFarStub)
return TheJIT->getPointerToFunction(F);
- // Okay, the function has not been compiled yet, if the target callback
- // mechanism is capable of rewriting the instruction directly, prefer to do
- // that instead of emitting a stub. This uses the lazy resolver, so is not
- // legal if lazy compilation is disabled.
- if (!MayNeedFarStub && TheJIT->isCompilingLazily())
- return Resolver.AddCallbackAtLocation(F, Reference);
-
// Otherwise, we have to emit a stub.
void *StubAddr = Resolver.getFunctionStub(F);
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index ab49b9e..4497931 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -82,7 +82,7 @@ namespace {
void emitPCRelativeBlockAddress(MachineBasicBlock *MBB);
void emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
intptr_t Disp = 0, intptr_t PCAdj = 0,
- bool MayNeedFarStub = false, bool Indirect = false);
+ bool Indirect = false);
void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
void emitConstPoolAddress(unsigned CPI, unsigned Reloc, intptr_t Disp = 0,
intptr_t PCAdj = 0);
@@ -176,7 +176,6 @@ template<class CodeEmitter>
void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
intptr_t Disp /* = 0 */,
intptr_t PCAdj /* = 0 */,
- bool MayNeedFarStub /* = false */,
bool Indirect /* = false */) {
intptr_t RelocCST = Disp;
if (Reloc == X86::reloc_picrel_word)
@@ -185,9 +184,9 @@ void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
RelocCST = PCAdj;
MachineRelocation MR = Indirect
? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
- GV, RelocCST, MayNeedFarStub)
+ GV, RelocCST, false)
: MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
- GV, RelocCST, MayNeedFarStub);
+ GV, RelocCST, false);
MCE.addRelocation(MR);
// The relocated value will be added to the displacement
if (Reloc == X86::reloc_absolute_dword)
@@ -333,10 +332,9 @@ void Emitter<CodeEmitter>::emitDisplacementField(const MachineOperand *RelocOp,
// do it, otherwise fallback to absolute (this is determined by IsPCRel).
// 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative
// 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute
- bool MayNeedFarStub = isa<Function>(RelocOp->getGlobal());
bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM);
emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(),
- Adj, MayNeedFarStub, Indirect);
+ Adj, Indirect);
} else if (RelocOp->isSymbol()) {
emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType);
} else if (RelocOp->isCPI()) {
@@ -633,14 +631,8 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
}
if (MO.isGlobal()) {
- // Assume undefined functions may be outside the Small codespace.
- bool MayNeedFarStub =
- (Is64BitMode &&
- (TM.getCodeModel() == CodeModel::Large ||
- TM.getSubtarget<X86Subtarget>().isTargetDarwin())) ||
- Opcode == X86::TAILJMPd;
emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word,
- MO.getOffset(), 0, MayNeedFarStub);
+ MO.getOffset(), 0);
break;
}
@@ -681,10 +673,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
if (Opcode == X86::MOV64ri)
rt = X86::reloc_absolute_dword; // FIXME: add X86II flag?
if (MO1.isGlobal()) {
- bool MayNeedFarStub = isa<Function>(MO1.getGlobal());
bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
- MayNeedFarStub, Indirect);
+ Indirect);
} else if (MO1.isSymbol())
emitExternalSymbolAddress(MO1.getSymbolName(), rt);
else if (MO1.isCPI())
@@ -790,10 +781,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
if (Opcode == X86::MOV64ri32)
rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag?
if (MO1.isGlobal()) {
- bool MayNeedFarStub = isa<Function>(MO1.getGlobal());
bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
- MayNeedFarStub, Indirect);
+ Indirect);
} else if (MO1.isSymbol())
emitExternalSymbolAddress(MO1.getSymbolName(), rt);
else if (MO1.isCPI())
@@ -831,10 +821,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
if (Opcode == X86::MOV64mi32)
rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag?
if (MO.isGlobal()) {
- bool MayNeedFarStub = isa<Function>(MO.getGlobal());
bool Indirect = gvNeedsNonLazyPtr(MO, TM);
emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0,
- MayNeedFarStub, Indirect);
+ Indirect);
} else if (MO.isSymbol())
emitExternalSymbolAddress(MO.getSymbolName(), rt);
else if (MO.isCPI())
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index dacb2c3..6018cf5 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1937,9 +1937,19 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
FPDiff, dl);
}
- // If the callee is a GlobalAddress node (quite common, every direct call is)
- // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ bool WasGlobalOrExternal = false;
+ if (getTargetMachine().getCodeModel() == CodeModel::Large) {
+ assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
+ // In the 64-bit large code model, we have to make all calls
+ // through a register, since the call instruction's 32-bit
+ // pc-relative offset may not be large enough to hold the whole
+ // address.
+ } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ WasGlobalOrExternal = true;
+ // If the callee is a GlobalAddress node (quite common, every direct call
+ // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
+ // it.
+
// We should use extra load for direct calls to dllimported functions in
// non-JIT mode.
GlobalValue *GV = G->getGlobal();
@@ -1967,6 +1977,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
G->getOffset(), OpFlags);
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ WasGlobalOrExternal = true;
unsigned char OpFlags = 0;
// On ELF targets, in either X86-64 or X86-32 mode, direct calls to external
@@ -1984,7 +1995,9 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
OpFlags);
- } else if (isTailCall) {
+ }
+
+ if (isTailCall && !WasGlobalOrExternal) {
unsigned Opc = Is64Bit ? X86::R11 : X86::EAX;
Chain = DAG.getCopyToReg(Chain, dl,
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index 62ca47f..0792bdd 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -367,8 +367,9 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
// Rewrite the call target... so that we don't end up here every time we
// execute the call.
#if defined (X86_64_JIT)
- if (!isStub)
- *(intptr_t *)(RetAddr - 0xa) = NewVal;
+ assert(isStub &&
+ "X86-64 doesn't support rewriting non-stub lazy compilation calls:"
+ " the call instruction varies too much.");
#else
*(intptr_t *)RetAddr = (intptr_t)(NewVal-RetAddr-4);
#endif
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 5d58a87..0cda8bc 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -185,14 +185,8 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
}
// 64-bit JIT places everything in the same buffer except external functions.
- // On Darwin, use small code model but hack the call instruction for
- // externals. Elsewhere, do not assume globals are in the lower 4G.
- if (Subtarget.is64Bit()) {
- if (Subtarget.isTargetDarwin())
- setCodeModel(CodeModel::Small);
- else
+ if (Subtarget.is64Bit())
setCodeModel(CodeModel::Large);
- }
PM.add(createX86CodeEmitterPass(*this, MCE));
@@ -211,14 +205,8 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
}
// 64-bit JIT places everything in the same buffer except external functions.
- // On Darwin, use small code model but hack the call instruction for
- // externals. Elsewhere, do not assume globals are in the lower 4G.
- if (Subtarget.is64Bit()) {
- if (Subtarget.isTargetDarwin())
- setCodeModel(CodeModel::Small);
- else
+ if (Subtarget.is64Bit())
setCodeModel(CodeModel::Large);
- }
PM.add(createX86JITCodeEmitterPass(*this, JCE));
diff --git a/test/Makefile b/test/Makefile
index 1387d56..1c8feba 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -77,12 +77,12 @@ endif
# Both AuroraUX & Solaris do not have the -m flag for ulimit
ifeq ($(HOST_OS),SunOS)
-ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -v 512000 ;
+ULIMIT=ulimit -t 600 ; ulimit -d 512000 ;
else # !SunOS
ifeq ($(HOST_OS),AuroraUX)
-ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -v 512000 ;
+ULIMIT=ulimit -t 600 ; ulimit -d 512000 ;
else # !AuroraUX
-ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -m 512000 ; ulimit -v 512000 ;
+ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -m 512000 ;
endif # AuroraUX
endif # SunOS
diff --git a/unittests/ExecutionEngine/JIT/JITTest.cpp b/unittests/ExecutionEngine/JIT/JITTest.cpp
index 98b2922..b0c2f24 100644
--- a/unittests/ExecutionEngine/JIT/JITTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITTest.cpp
@@ -26,10 +26,22 @@
#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TypeBuilder.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSelect.h"
#include "llvm/Type.h"
#include <vector>
+#include <string.h>
+
+#if HAVE_ERRNO_H
+#include <errno.h>
+#endif
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#if _POSIX_MAPPED_FILES > 0
+#include <sys/mman.h>
+#endif
using namespace llvm;
@@ -177,6 +189,15 @@ public:
}
};
+void LoadAssemblyInto(Module *M, const char *assembly) {
+ SMDiagnostic Error;
+ bool success = NULL != ParseAssemblyString(assembly, M, Error, M->getContext());
+ std::string errMsg;
+ raw_string_ostream os(errMsg);
+ Error.Print("", os);
+ ASSERT_TRUE(success) << os.str();
+}
+
class JITTest : public testing::Test {
protected:
virtual void SetUp() {
@@ -191,12 +212,7 @@ class JITTest : public testing::Test {
}
void LoadAssembly(const char *assembly) {
- SMDiagnostic Error;
- bool success = NULL != ParseAssemblyString(assembly, M, Error, Context);
- std::string errMsg;
- raw_string_ostream os(errMsg);
- Error.Print("", os);
- ASSERT_TRUE(success) << os.str();
+ LoadAssemblyInto(M, assembly);
}
LLVMContext Context;
@@ -498,6 +514,135 @@ TEST_F(JITTest, NoStubs) {
}
#endif
+#if _POSIX_MAPPED_FILES > 0 && (defined (__x86_64__) || defined (_M_AMD64) || defined (_M_X64))
+class FarCallMemMgr : public RecordingJITMemoryManager {
+ void *MmapRegion;
+ size_t MmapSize;
+ uint8_t *NextStub;
+ uint8_t *NextFunction;
+
+ public:
+ FarCallMemMgr()
+ : MmapSize(16ULL << 30) { // 16GB
+ MmapRegion = mmap(NULL, MmapSize, PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (MmapRegion == MAP_FAILED) {
+ ADD_FAILURE() << "mmap failed: " << strerror(errno);
+ }
+ // Set up the 16GB mapped region in several chunks:
+ // Stubs / ~5GB empty space / Function 1 / ~5GB empty space / Function 2
+ // This way no two entities can use a 32-bit relative call to reach each other.
+ NextStub = static_cast<uint8_t*>(MmapRegion);
+ NextFunction = NextStub + (5ULL << 30);
+
+ // Next, poison some of the memory so a wild call will eventually crash,
+ // even if memory was initialized by the OS to 0. We can't poison all of
+ // the memory because we want to be able to run on systems with less than
+ // 16GB of physical ram.
+ int TrapInstr = 0xCC; // INT 3
+ memset(NextStub, TrapInstr, 1<<10);
+ for (size_t Offset = 1<<30; Offset < MmapSize; Offset += 1<<30) {
+ // Fill the 2KB around each GB boundary with trap instructions. This
+ // should ensure that we can't run into emitted functions without hitting
+ // the trap.
+ memset(NextStub + Offset - (1<<10), TrapInstr, 2<<10);
+ }
+ }
+
+ ~FarCallMemMgr() {
+ EXPECT_EQ(0, munmap(MmapRegion, MmapSize));
+ }
+
+ virtual void setMemoryWritable() {}
+ virtual void setMemoryExecutable() {}
+ virtual uint8_t *startFunctionBody(const Function *F,
+ uintptr_t &ActualSize) {
+ ActualSize = 1 << 30;
+ uint8_t *Result = NextFunction;
+ NextFunction += 5ULL << 30;
+ return Result;
+ }
+ virtual void endFunctionBody(const Function*, uint8_t*, uint8_t*) {}
+ virtual uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize,
+ unsigned Alignment) {
+ NextStub = reinterpret_cast<uint8_t*>(
+ uintptr_t(NextStub + Alignment - 1) &~ uintptr_t(Alignment - 1));
+ uint8_t *Result = NextStub;
+ NextStub += StubSize;
+ return Result;
+ }
+};
+
+class FarTargetTest : public ::testing::TestWithParam<CodeGenOpt::Level> {
+ protected:
+ FarTargetTest() : SavedCodeModel(TargetMachine::getCodeModel()) {}
+ ~FarTargetTest() {
+ TargetMachine::setCodeModel(SavedCodeModel);
+ }
+
+ const CodeModel::Model SavedCodeModel;
+};
+INSTANTIATE_TEST_CASE_P(CodeGenOpt,
+ FarTargetTest,
+ ::testing::Values(CodeGenOpt::None,
+ CodeGenOpt::Default));
+
+TEST_P(FarTargetTest, CallToFarTarget) {
+ // x86-64 can only make direct calls to functions within 32 bits of
+ // the current PC. To call anything farther away, we have to load
+ // the address into a register and call through the register. The
+ // old JIT did this by allocating a stub for any far call. However,
+ // that stub needed to be within 32 bits of the callsite. Here we
+ // test that the JIT correctly deals with stubs and calls more than
+ // 32 bits away from the callsite.
+
+ // Make sure the code generator is assuming code might be far away.
+ //TargetMachine::setCodeModel(CodeModel::Large);
+
+ LLVMContext Context;
+ Module *M = new Module("<main>", Context);
+ ExistingModuleProvider *MP = new ExistingModuleProvider(M);
+
+ JITMemoryManager *MemMgr = new FarCallMemMgr();
+ std::string Error;
+ OwningPtr<ExecutionEngine> JIT(EngineBuilder(MP)
+ .setEngineKind(EngineKind::JIT)
+ .setErrorStr(&Error)
+ .setJITMemoryManager(MemMgr)
+ .setOptLevel(GetParam())
+ .create());
+ ASSERT_EQ(Error, "");
+ TargetMachine::setCodeModel(CodeModel::Large);
+
+ LoadAssemblyInto(M,
+ "define i32 @test() { "
+ " ret i32 7 "
+ "} "
+ " "
+ "define i32 @test_far() { "
+ " %result = call i32 @test() "
+ " ret i32 %result "
+ "} ");
+ // First, lay out a function early in memory.
+ Function *TestFunction = M->getFunction("test");
+ int32_t (*TestFunctionPtr)() = reinterpret_cast<int32_t(*)()>(
+ (intptr_t)JIT->getPointerToFunction(TestFunction));
+ ASSERT_EQ(7, TestFunctionPtr());
+
+ // We now lay out the far-away function. This should land >4GB away from test().
+ Function *FarFunction = M->getFunction("test_far");
+ int32_t (*FarFunctionPtr)() = reinterpret_cast<int32_t(*)()>(
+ (intptr_t)JIT->getPointerToFunction(FarFunction));
+
+ EXPECT_LT(1LL << 32, llabs(intptr_t(FarFunctionPtr) - intptr_t(TestFunctionPtr)))
+ << "Functions must be >32 bits apart or the test is meaningless.";
+
+ // This used to result in a segfault in FarFunction, when its call instruction
+ // jumped to the wrong address.
+ EXPECT_EQ(7, FarFunctionPtr());
+}
+#endif // Platform has far-call problem.
+
// This code is copied from JITEventListenerTest, but it only runs once for all
// the tests in this directory. Everything seems fine, but that's strange
// behavior.
diff --git a/utils/lit/TestFormats.py b/utils/lit/TestFormats.py
index cc40a30..f067bae 100644
--- a/utils/lit/TestFormats.py
+++ b/utils/lit/TestFormats.py
@@ -53,8 +53,9 @@ class GoogleTest(object):
def execute(self, test, litConfig):
testPath,testName = os.path.split(test.getSourcePath())
- if not os.path.exists(testPath):
- # Handle GTest typed tests, whose name includes a '/'.
+ while not os.path.exists(testPath):
+ # Handle GTest parametrized and typed tests, whose name includes
+ # some '/'s.
testPath, namePrefix = os.path.split(testPath)
testName = os.path.join(namePrefix, testName)