diff options
author | Andreas Gampe <agampe@google.com> | 2014-06-18 17:01:15 -0700 |
---|---|---|
committer | Andreas Gampe <agampe@google.com> | 2014-06-18 17:11:51 -0700 |
commit | 7cd26f355ba83be75b72ed628ed5ee84a3245c4f (patch) | |
tree | 94152cdd06143bec8c5491dba354cb78214b48c3 /runtime | |
parent | 0c29909cbde112bc9c04da4ce81421e1a0b39f36 (diff) | |
download | art-7cd26f355ba83be75b72ed628ed5ee84a3245c4f.zip art-7cd26f355ba83be75b72ed628ed5ee84a3245c4f.tar.gz art-7cd26f355ba83be75b72ed628ed5ee84a3245c4f.tar.bz2 |
ART: Target-dependent stack overflow, less check elision
Refactor the separate stack overflow reserved sizes from thread.h
into instruction_set.h and make sure they're used in the compiler.
Refactor the decision on when to elide stack overflow checks:
especially with large interpreter stack frames, it is not a good
idea to elide checks when the frame size is even close to the
reserved size. Currently enforce checks when the frame size is
>= 2KB, but make sure that frame sizes 1KB and below will elide
the checks (number from experience).
Bug: 15728765
Change-Id: I016bfd3d8218170cbccbd123ed5e2203db167c06
Diffstat (limited to 'runtime')
-rw-r--r-- | runtime/arch/arm/fault_handler_arm.cc | 5 | ||||
-rw-r--r-- | runtime/instruction_set.h | 55 | ||||
-rw-r--r-- | runtime/thread.cc | 6 | ||||
-rw-r--r-- | runtime/thread.h | 25 |
4 files changed, 51 insertions, 40 deletions
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc index f81e2f9..2a82129 100644 --- a/runtime/arch/arm/fault_handler_arm.cc +++ b/runtime/arch/arm/fault_handler_arm.cc @@ -22,6 +22,7 @@ #include "globals.h" #include "base/logging.h" #include "base/hex_dump.h" +#include "instruction_set.h" #include "mirror/art_method.h" #include "mirror/art_method-inl.h" #include "thread.h" @@ -59,7 +60,7 @@ void FaultManager::GetMethodAndReturnPCAndSP(void* context, mirror::ArtMethod** // get the method from the top of the stack. However it's in r0. uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(sc->fault_address); uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>( - reinterpret_cast<uint8_t*>(*out_sp) - Thread::kStackOverflowReservedBytes); + reinterpret_cast<uint8_t*>(*out_sp) - kArmStackOverflowReservedBytes); if (overflow_addr == fault_addr) { *out_method = reinterpret_cast<mirror::ArtMethod*>(sc->arm_r0); } else { @@ -190,7 +191,7 @@ bool StackOverflowHandler::Action(int sig, siginfo_t* info, void* context) { VLOG(signals) << "checking for stack overflow, sp: " << std::hex << sp << ", fault_addr: " << fault_addr; - uintptr_t overflow_addr = sp - Thread::kStackOverflowReservedBytes; + uintptr_t overflow_addr = sp - kArmStackOverflowReservedBytes; Thread* self = reinterpret_cast<Thread*>(sc->arm_r9); CHECK_EQ(self, Thread::Current()); diff --git a/runtime/instruction_set.h b/runtime/instruction_set.h index 67e7100..96eeb8d 100644 --- a/runtime/instruction_set.h +++ b/runtime/instruction_set.h @@ -22,6 +22,7 @@ #include "base/logging.h" // Logging is required for FATAL in the helper functions. #include "base/macros.h" +#include "globals.h" // For KB. namespace art { @@ -36,6 +37,20 @@ enum InstructionSet { }; std::ostream& operator<<(std::ostream& os, const InstructionSet& rhs); +#if defined(__arm__) +static constexpr InstructionSet kRuntimeISA = kArm; +#elif defined(__aarch64__) +static constexpr InstructionSet kRuntimeISA = kArm64; +#elif defined(__mips__) +static constexpr InstructionSet kRuntimeISA = kMips; +#elif defined(__i386__) +static constexpr InstructionSet kRuntimeISA = kX86; +#elif defined(__x86_64__) +static constexpr InstructionSet kRuntimeISA = kX86_64; +#else +static constexpr InstructionSet kRuntimeISA = kNone; +#endif + // Architecture-specific pointer sizes static constexpr size_t kArmPointerSize = 4; static constexpr size_t kArm64PointerSize = 8; @@ -153,19 +168,33 @@ static inline size_t GetBytesPerFprSpillLocation(InstructionSet isa) { } } -#if defined(__arm__) -static constexpr InstructionSet kRuntimeISA = kArm; -#elif defined(__aarch64__) -static constexpr InstructionSet kRuntimeISA = kArm64; -#elif defined(__mips__) -static constexpr InstructionSet kRuntimeISA = kMips; -#elif defined(__i386__) -static constexpr InstructionSet kRuntimeISA = kX86; -#elif defined(__x86_64__) -static constexpr InstructionSet kRuntimeISA = kX86_64; -#else -static constexpr InstructionSet kRuntimeISA = kNone; -#endif +static constexpr size_t kDefaultStackOverflowReservedBytes = 16 * KB; +static constexpr size_t kArmStackOverflowReservedBytes = kDefaultStackOverflowReservedBytes; +static constexpr size_t kMipsStackOverflowReservedBytes = kDefaultStackOverflowReservedBytes; + +// TODO: shrink reserved space, in particular for 64bit. + +// Worst-case, we would need about 2.6x the amount of x86_64 for many more registers. +// But this one works rather well. +static constexpr size_t kArm64StackOverflowReservedBytes = 32 * KB; +// TODO: Bumped to workaround regression (http://b/14982147) Specifically to fix: +// test-art-host-run-test-interpreter-018-stack-overflow +// test-art-host-run-test-interpreter-107-int-math2 +static constexpr size_t kX86StackOverflowReservedBytes = 24 * KB; +static constexpr size_t kX86_64StackOverflowReservedBytes = 32 * KB; + +static constexpr size_t GetStackOverflowReservedBytes(InstructionSet isa) { + return (isa == kArm || isa == kThumb2) ? kArmStackOverflowReservedBytes : + isa == kArm64 ? kArm64StackOverflowReservedBytes : + isa == kMips ? kMipsStackOverflowReservedBytes : + isa == kX86 ? kX86StackOverflowReservedBytes : + isa == kX86_64 ? kX86_64StackOverflowReservedBytes : + isa == kNone ? (LOG(FATAL) << "kNone has no stack overflow size", 0) : + (LOG(FATAL) << "Unknown instruction set" << isa, 0); +} + +static constexpr size_t kRuntimeStackOverflowReservedBytes = + GetStackOverflowReservedBytes(kRuntimeISA); enum InstructionFeatures { kHwDiv = 0x1, // Supports hardware divide. diff --git a/runtime/thread.cc b/runtime/thread.cc index 6980530..3f8f4a3 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -220,7 +220,7 @@ static size_t FixStackSize(size_t stack_size) { // It's likely that callers are trying to ensure they have at least a certain amount of // stack space, so we should add our reserved space on top of what they requested, rather // than implicitly take it away from them. - stack_size += Thread::kStackOverflowReservedBytes; + stack_size += kRuntimeStackOverflowReservedBytes; } else { // If we are going to use implicit stack checks, allocate space for the protected // region at the bottom of the stack. @@ -489,7 +489,7 @@ void Thread::InitStackHwm() { tlsPtr_.stack_begin = reinterpret_cast<byte*>(read_stack_base); tlsPtr_.stack_size = read_stack_size; - if (read_stack_size <= kStackOverflowReservedBytes) { + if (read_stack_size <= kRuntimeStackOverflowReservedBytes) { LOG(FATAL) << "Attempt to attach a thread with a too-small stack (" << read_stack_size << " bytes)"; } @@ -2200,7 +2200,7 @@ void Thread::SetStackEndForStackOverflow() { if (tlsPtr_.stack_end == tlsPtr_.stack_begin) { // However, we seem to have already extended to use the full stack. LOG(ERROR) << "Need to increase kStackOverflowReservedBytes (currently " - << kStackOverflowReservedBytes << ")?"; + << kRuntimeStackOverflowReservedBytes << ")?"; DumpStack(LOG(ERROR)); LOG(FATAL) << "Recursive stack overflow."; } diff --git a/runtime/thread.h b/runtime/thread.h index bff9b52..7cd86de 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -33,6 +33,7 @@ #include "gc/allocator/rosalloc.h" #include "globals.h" #include "handle_scope.h" +#include "instruction_set.h" #include "jvalue.h" #include "object_callbacks.h" #include "offsets.h" @@ -94,28 +95,8 @@ enum ThreadFlag { class Thread { public: - // Space to throw a StackOverflowError in. - // TODO: shrink reserved space, in particular for 64bit. -#if defined(__x86_64__) - static constexpr size_t kStackOverflowReservedBytes = 32 * KB; -#elif defined(__aarch64__) - // Worst-case, we would need about 2.6x the amount of x86_64 for many more registers. - // But this one works rather well. - static constexpr size_t kStackOverflowReservedBytes = 32 * KB; -#elif defined(__i386__) - // TODO: Bumped to workaround regression (http://b/14982147) Specifically to fix: - // test-art-host-run-test-interpreter-018-stack-overflow - // test-art-host-run-test-interpreter-107-int-math2 - static constexpr size_t kStackOverflowReservedBytes = 24 * KB; -#else - static constexpr size_t kStackOverflowReservedBytes = 16 * KB; -#endif // How much of the reserved bytes is reserved for incoming signals. static constexpr size_t kStackOverflowSignalReservedBytes = 2 * KB; - // How much of the reserved bytes we may temporarily use during stack overflow checks as an - // optimization. - static constexpr size_t kStackOverflowReservedUsableBytes = - kStackOverflowReservedBytes - kStackOverflowSignalReservedBytes; // For implicit overflow checks we reserve an extra piece of memory at the bottom // of the stack (lowest memory). The higher portion of the memory @@ -123,7 +104,7 @@ class Thread { // throwing the StackOverflow exception. static constexpr size_t kStackOverflowProtectedSize = 16 * KB; static constexpr size_t kStackOverflowImplicitCheckSize = kStackOverflowProtectedSize + - kStackOverflowReservedBytes; + kRuntimeStackOverflowReservedBytes; // Creates a new native thread corresponding to the given managed peer. // Used to implement Thread.start. @@ -585,7 +566,7 @@ class Thread { // overflow region. tlsPtr_.stack_end = tlsPtr_.stack_begin + kStackOverflowImplicitCheckSize; } else { - tlsPtr_.stack_end = tlsPtr_.stack_begin + kStackOverflowReservedBytes; + tlsPtr_.stack_end = tlsPtr_.stack_begin + kRuntimeStackOverflowReservedBytes; } } |