From 7f57e8c60ec31461151a8bfdd2b3fabfa78cb3f5 Mon Sep 17 00:00:00 2001 From: Andreas Gampe Date: Mon, 26 Oct 2015 20:47:28 -0700 Subject: [WIP] ART: Write-protect TLS Change-Id: I6762a3a30d01bd6eb8bb25f23f390c91147fe9b4 --- build/Android.common_build.mk | 2 +- compiler/utils/arm64/assembler_arm64.cc | 2 +- runtime/arch/x86/thread_x86.cc | 2 +- runtime/asm_support.h | 15 ++- runtime/base/mutex-inl.h | 4 +- .../quick/quick_trampoline_entrypoints.cc | 1 + runtime/mem_map.cc | 31 ++++--- runtime/thread.cc | 102 +++++++++++++++++++-- runtime/thread.h | 74 ++++++++------- runtime/thread_list.cc | 2 +- 10 files changed, 171 insertions(+), 64 deletions(-) diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk index b84154b..fabaaec 100644 --- a/build/Android.common_build.mk +++ b/build/Android.common_build.mk @@ -233,7 +233,7 @@ art_non_debug_cflags := \ # Cflags for debug ART and ART tools. art_debug_cflags := \ - -O2 \ + -O0 \ -DDYNAMIC_ANNOTATIONS_ENABLED=1 \ -DVIXL_DEBUG \ -UNDEBUG diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc index 7d98a30..f068571 100644 --- a/compiler/utils/arm64/assembler_arm64.cc +++ b/compiler/utils/arm64/assembler_arm64.cc @@ -526,7 +526,7 @@ void Arm64Assembler::JumpTo(ManagedRegister m_base, Offset offs, ManagedRegister CHECK(scratch.IsXRegister()) << scratch; // Remove base and scratch form the temp list - higher level API uses IP1, IP0. vixl::UseScratchRegisterScope temps(vixl_masm_); - temps.Exclude(reg_x(base.AsXRegister()), reg_x(scratch.AsXRegister())); + temps.Exclude(reg_x(base.AsXRegister())); ___ Ldr(reg_x(scratch.AsXRegister()), MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value())); ___ Br(reg_x(scratch.AsXRegister())); } diff --git a/runtime/arch/x86/thread_x86.cc b/runtime/arch/x86/thread_x86.cc index b97c143..56b0b79 100644 --- a/runtime/arch/x86/thread_x86.cc +++ b/runtime/arch/x86/thread_x86.cc @@ -137,7 +137,7 @@ void Thread::InitCpu() { } void Thread::CleanupCpu() { - MutexLock mu(this, *Locks::modify_ldt_lock_); + MutexLock mu(nullptr, *Locks::modify_ldt_lock_); // Sanity check that reads from %fs point to this Thread*. Thread* self_check; diff --git a/runtime/asm_support.h b/runtime/asm_support.h index 10ed0f4..69b26eb 100644 --- a/runtime/asm_support.h +++ b/runtime/asm_support.h @@ -89,7 +89,11 @@ ADD_TEST_EQ(THREAD_ID_OFFSET, art::Thread::ThinLockIdOffset<__SIZEOF_POINTER__>().Int32Value()) // Offset of field Thread::tlsPtr_.card_table. -#define THREAD_CARD_TABLE_OFFSET 128 +#if defined(__LP64__) +#define THREAD_CARD_TABLE_OFFSET 160 +#else +#define THREAD_CARD_TABLE_OFFSET 144 +#endif ADD_TEST_EQ(THREAD_CARD_TABLE_OFFSET, art::Thread::CardTableOffset<__SIZEOF_POINTER__>().Int32Value()) @@ -104,11 +108,16 @@ ADD_TEST_EQ(THREAD_TOP_QUICK_FRAME_OFFSET, art::Thread::TopOfManagedStackOffset<__SIZEOF_POINTER__>().Int32Value()) // Offset of field Thread::tlsPtr_.managed_stack.top_quick_frame_. -#define THREAD_SELF_OFFSET (THREAD_CARD_TABLE_OFFSET + (9 * __SIZEOF_POINTER__)) +#if defined(__LP64__) +#define THREAD_SELF_OFFSET 2112 +#else +#define THREAD_SELF_OFFSET 1120 +#endif +// (THREAD_CARD_TABLE_OFFSET + (9 * __SIZEOF_POINTER__)) ADD_TEST_EQ(THREAD_SELF_OFFSET, art::Thread::SelfOffset<__SIZEOF_POINTER__>().Int32Value()) -#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 147 * __SIZEOF_POINTER__) +#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 31 * __SIZEOF_POINTER__) ADD_TEST_EQ(THREAD_LOCAL_POS_OFFSET, art::Thread::ThreadLocalPosOffset<__SIZEOF_POINTER__>().Int32Value()) #define THREAD_LOCAL_END_OFFSET (THREAD_LOCAL_POS_OFFSET + __SIZEOF_POINTER__) diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h index 87840e7..2ae3b19 100644 --- a/runtime/base/mutex-inl.h +++ b/runtime/base/mutex-inl.h @@ -74,7 +74,9 @@ static inline void CheckUnattachedThread(LockLevel level) NO_THREAD_SAFETY_ANALY // Ignore logging which may or may not have set up thread data structures. level == kLoggingLock || // Avoid recursive death. - level == kAbortLock) << level; + level == kAbortLock || + // A MemMap may be created for thread objects + level == kMemMapsLock) << level; } } diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index 2ea5cb0..73db6ba 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -1949,6 +1949,7 @@ static TwoWordReturn artInvokeCommon(uint32_t method_idx, mirror::Object* this_o ArtMethod* caller_method, Thread* self, ArtMethod** sp) { ScopedQuickEntrypointChecks sqec(self); DCHECK_EQ(*sp, Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs)); + DCHECK(caller_method != nullptr); ArtMethod* method = FindMethodFast(method_idx, this_object, caller_method, access_check, type); if (UNLIKELY(method == nullptr)) { const DexFile* dex_file = caller_method->GetDeclaringClass()->GetDexCache()->GetDexFile(); diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc index 6566060..e13822a 100644 --- a/runtime/mem_map.cc +++ b/runtime/mem_map.cc @@ -511,26 +511,29 @@ MemMap::~MemMap() { if (base_begin_ == nullptr && base_size_ == 0) { return; } + + // Remove it from maps_. + { + MutexLock mu(Thread::Current(), *Locks::mem_maps_lock_); + bool found = false; + DCHECK(maps_ != nullptr); + for (auto it = maps_->lower_bound(base_begin_), end = maps_->end(); + it != end && it->first == base_begin_; ++it) { + if (it->second == this) { + found = true; + maps_->erase(it); + break; + } + } + CHECK(found) << "MemMap not found"; + } + if (!reuse_) { int result = munmap(base_begin_, base_size_); if (result == -1) { PLOG(FATAL) << "munmap failed"; } } - - // Remove it from maps_. - MutexLock mu(Thread::Current(), *Locks::mem_maps_lock_); - bool found = false; - DCHECK(maps_ != nullptr); - for (auto it = maps_->lower_bound(base_begin_), end = maps_->end(); - it != end && it->first == base_begin_; ++it) { - if (it->second == this) { - found = true; - maps_->erase(it); - break; - } - } - CHECK(found) << "MemMap not found"; } MemMap::MemMap(const std::string& name, uint8_t* begin, size_t size, void* base_begin, diff --git a/runtime/thread.cc b/runtime/thread.cc index 6e8f89c..a9173d5 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -96,9 +96,11 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints, void Thread::InitTlsEntryPoints() { // Insert a placeholder so we can easily tell if we call an unimplemented entry point. - uintptr_t* begin = reinterpret_cast(&tlsPtr_.interpreter_entrypoints); - uintptr_t* end = reinterpret_cast(reinterpret_cast(&tlsPtr_.quick_entrypoints) + - sizeof(tlsPtr_.quick_entrypoints)); + uintptr_t* begin = reinterpret_cast(&tlsPtr_.quick_entrypoints); + uintptr_t* end = reinterpret_cast( + reinterpret_cast(&tlsPtr_.interpreter_entrypoints) + + sizeof(tlsPtr_.interpreter_entrypoints)); + DCHECK_LT(begin, end); for (uintptr_t* it = begin; it != end; ++it) { *it = reinterpret_cast(UnimplementedEntryPoint); } @@ -106,7 +108,90 @@ void Thread::InitTlsEntryPoints() { &tlsPtr_.quick_entrypoints); } +static constexpr bool kUseWriteProtectScheme = true; + +static size_t GetProtectionOffset() { + return RoundUp(QUICK_ENTRYPOINT_OFFSET(sizeof(void*), pInstanceofNonTrivial).Uint32Value(), 16); +} + +// Allocate a thread. This might do some magic to use two pages. +Thread* Thread::AllocateThread(bool is_daemon) { + if (!kUseWriteProtectScheme) { + return new Thread(is_daemon); + } + + std::string error_msg; + MemMap* mem_map = MemMap::MapAnonymous("some thread", + nullptr, + 2 * kPageSize, + PROT_READ | PROT_WRITE, + false, + false, + &error_msg); + if (mem_map == nullptr) { + PLOG(FATAL) << error_msg; + } + + uint8_t* second_page_address = mem_map->Begin() + kPageSize; + const uint32_t offset = GetProtectionOffset(); + uintptr_t start_address = reinterpret_cast(second_page_address) - offset; + DCHECK_GE(start_address, reinterpret_cast(mem_map->Begin()) + sizeof(void*)); + void* start_address_ptr = reinterpret_cast(start_address); + Thread* t = new (start_address_ptr) Thread(is_daemon); + + // Store a pointer to the MemMap at the bottom. + *reinterpret_cast(mem_map->Begin()) = mem_map; + + return t; +} + +static void ProtectThread(Thread* thread) { + if (!kUseWriteProtectScheme) { + return; + } + + uintptr_t thread_addr = reinterpret_cast(thread); + DCHECK_EQ(RoundUp(thread_addr, kPageSize), thread_addr + GetProtectionOffset()); + void* page_address = reinterpret_cast(RoundUp(thread_addr, kPageSize)); + mprotect(page_address, kPageSize, PROT_READ); +} + +static void UnprotectThread(Thread* thread) { + if (!kUseWriteProtectScheme) { + return; + } + + uintptr_t thread_addr = reinterpret_cast(thread); + DCHECK_EQ(RoundUp(thread_addr, kPageSize), thread_addr + GetProtectionOffset()); + void* page_address = reinterpret_cast(RoundUp(thread_addr, kPageSize)); + mprotect(page_address, kPageSize, PROT_READ | PROT_WRITE); +} + +void Thread::DeleteThread(Thread* thread) { + if (!kUseWriteProtectScheme) { + delete thread; + return; + } + + if (thread == nullptr) { + return; + } + + UnprotectThread(thread); + thread->~Thread(); + + // There should be the MemMap* at the bottom. + MemMap* mem_map = + *reinterpret_cast(RoundDown(reinterpret_cast(thread), kPageSize)); + + delete mem_map; +} + void Thread::InitStringEntryPoints() { + // Ensure things are writable. This may be a late initialization of the entrypoints for the main + // thread. + UnprotectThread(this); + ScopedObjectAccess soa(this); QuickEntryPoints* qpoints = &tlsPtr_.quick_entrypoints; qpoints->pNewEmptyString = reinterpret_cast( @@ -141,6 +226,9 @@ void Thread::InitStringEntryPoints() { soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromStringBuffer)); qpoints->pNewStringFromStringBuilder = reinterpret_cast( soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromStringBuilder)); + + // This is a good time to protect things, now that all entrypoints are set. + ProtectThread(this); } void Thread::ResetQuickAllocEntryPointsForThread() { @@ -406,7 +494,7 @@ void Thread::CreateNativeThread(JNIEnv* env, jobject java_peer, size_t stack_siz return; } - Thread* child_thread = new Thread(is_daemon); + Thread* child_thread = AllocateThread(is_daemon); // Use global JNI ref to hold peer live while child thread starts. child_thread->tlsPtr_.jpeer = env->NewGlobalRef(java_peer); stack_size = FixStackSize(stack_size); @@ -454,7 +542,7 @@ void Thread::CreateNativeThread(JNIEnv* env, jobject java_peer, size_t stack_siz // Manually delete the global reference since Thread::Init will not have been run. env->DeleteGlobalRef(child_thread->tlsPtr_.jpeer); child_thread->tlsPtr_.jpeer = nullptr; - delete child_thread; + DeleteThread(child_thread); child_thread = nullptr; // TODO: remove from thread group? env->SetLongField(java_peer, WellKnownClasses::java_lang_Thread_nativePeer, 0); @@ -525,11 +613,11 @@ Thread* Thread::Attach(const char* thread_name, bool as_daemon, jobject thread_g return nullptr; } else { Runtime::Current()->StartThreadBirth(); - self = new Thread(as_daemon); + self = AllocateThread(as_daemon); bool init_success = self->Init(runtime->GetThreadList(), runtime->GetJavaVM()); Runtime::Current()->EndThreadBirth(); if (!init_success) { - delete self; + DeleteThread(self); return nullptr; } } diff --git a/runtime/thread.h b/runtime/thread.h index 0e71c08..eb1809d 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -531,7 +531,8 @@ class Thread { private: template static ThreadOffset ThreadOffsetFromTlsPtr(size_t tls_ptr_offset) { - size_t base = OFFSETOF_MEMBER(Thread, tlsPtr_); + size_t base = /* OFFSETOF_MEMBER(Thread, tlsPtr_); */ + pointer_size == 8u ? 160 : 144; size_t scale; size_t shrink; if (pointer_size == sizeof(void*)) { @@ -951,6 +952,8 @@ class Thread { ~Thread() LOCKS_EXCLUDED(Locks::mutator_lock_, Locks::thread_suspend_count_lock_); void Destroy(); + static Thread* AllocateThread(bool is_daemon); + static void DeleteThread(Thread* thread); void CreatePeer(const char* name, bool as_daemon, jobject thread_group); @@ -1132,19 +1135,31 @@ class Thread { RuntimeStats stats; } tls64_; - struct PACKED(4) tls_ptr_sized_values { - tls_ptr_sized_values() : card_table(nullptr), exception(nullptr), stack_end(nullptr), - managed_stack(), suspend_trigger(nullptr), jni_env(nullptr), tmp_jni_env(nullptr), - self(nullptr), opeer(nullptr), jpeer(nullptr), stack_begin(nullptr), stack_size(0), - stack_trace_sample(nullptr), wait_next(nullptr), monitor_enter_object(nullptr), - top_handle_scope(nullptr), class_loader_override(nullptr), long_jump_context(nullptr), - instrumentation_stack(nullptr), debug_invoke_req(nullptr), single_step_control(nullptr), - stacked_shadow_frame_record(nullptr), deoptimization_return_value_stack(nullptr), - name(nullptr), pthread_self(0), - last_no_thread_suspension_cause(nullptr), thread_local_start(nullptr), - thread_local_pos(nullptr), thread_local_end(nullptr), thread_local_objects(0), - thread_local_alloc_stack_top(nullptr), thread_local_alloc_stack_end(nullptr), - nested_signal_state(nullptr), flip_function(nullptr), method_verifier(nullptr) { + // Guards the 'interrupted_' and 'wait_monitor_' members. + Mutex* wait_mutex_ DEFAULT_MUTEX_ACQUIRED_AFTER; + + // Condition variable waited upon during a wait. + ConditionVariable* wait_cond_ GUARDED_BY(wait_mutex_); + // Pointer to the monitor lock we're currently waiting on or null if not waiting. + Monitor* wait_monitor_ GUARDED_BY(wait_mutex_); + + // Thread "interrupted" status; stays raised until queried or thrown. + bool interrupted_ GUARDED_BY(wait_mutex_); + + struct PACKED(sizeof(void*)) tls_ptr_sized_values { + tls_ptr_sized_values() : card_table(nullptr), exception(nullptr), stack_end(nullptr), + managed_stack(), suspend_trigger(nullptr), jni_env(nullptr), tmp_jni_env(nullptr), + opeer(nullptr), jpeer(nullptr), stack_begin(nullptr), stack_size(0), + stack_trace_sample(nullptr), wait_next(nullptr), monitor_enter_object(nullptr), + top_handle_scope(nullptr), class_loader_override(nullptr), long_jump_context(nullptr), + instrumentation_stack(nullptr), debug_invoke_req(nullptr), single_step_control(nullptr), + stacked_shadow_frame_record(nullptr), deoptimization_return_value_stack(nullptr), + name(nullptr), pthread_self(0), + last_no_thread_suspension_cause(nullptr), thread_local_start(nullptr), + thread_local_pos(nullptr), thread_local_end(nullptr), thread_local_objects(0), + thread_local_alloc_stack_top(nullptr), thread_local_alloc_stack_end(nullptr), + nested_signal_state(nullptr), flip_function(nullptr), method_verifier(nullptr), + self(nullptr) { std::fill(held_mutexes, held_mutexes + kLockLevelCount, nullptr); } @@ -1172,11 +1187,6 @@ class Thread { // created thread. JNIEnvExt* tmp_jni_env; - // Initialized to "this". On certain architectures (such as x86) reading off of Thread::Current - // is easy but getting the address of Thread::Current is hard. This field can be read off of - // Thread::Current to give the address. - Thread* self; - // Our managed peer (an instance of java.lang.Thread). The jobject version is used during thread // start up, until the thread is registered and the local opeer_ is used. mirror::Object* opeer; @@ -1238,12 +1248,6 @@ class Thread { // Locks::thread_suspend_count_lock_. Closure* checkpoint_functions[kMaxCheckpoints]; - // Entrypoint function pointers. - // TODO: move this to more of a global offset table model to avoid per-thread duplication. - InterpreterEntryPoints interpreter_entrypoints; - JniEntryPoints jni_entrypoints; - QuickEntryPoints quick_entrypoints; - // Thread-local allocation pointer. uint8_t* thread_local_start; uint8_t* thread_local_pos; @@ -1268,18 +1272,18 @@ class Thread { // Current method verifier, used for root marking. verifier::MethodVerifier* method_verifier; - } tlsPtr_; - - // Guards the 'interrupted_' and 'wait_monitor_' members. - Mutex* wait_mutex_ DEFAULT_MUTEX_ACQUIRED_AFTER; - // Condition variable waited upon during a wait. - ConditionVariable* wait_cond_ GUARDED_BY(wait_mutex_); - // Pointer to the monitor lock we're currently waiting on or null if not waiting. - Monitor* wait_monitor_ GUARDED_BY(wait_mutex_); + // Entrypoint function pointers. + // TODO: move this to more of a global offset table model to avoid per-thread duplication. + QuickEntryPoints quick_entrypoints; + JniEntryPoints jni_entrypoints; + InterpreterEntryPoints interpreter_entrypoints; - // Thread "interrupted" status; stays raised until queried or thrown. - bool interrupted_ GUARDED_BY(wait_mutex_); + // Initialized to "this". On certain architectures (such as x86) reading off of Thread::Current + // is easy but getting the address of Thread::Current is hard. This field can be read off of + // Thread::Current to give the address. + Thread* self; + } tlsPtr_; friend class Dbg; // For SetStateUnsafe. friend class gc::collector::SemiSpace; // For getting stack traces. diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc index b697b43..7d49112 100644 --- a/runtime/thread_list.cc +++ b/runtime/thread_list.cc @@ -1148,7 +1148,7 @@ void ThreadList::Unregister(Thread* self) { } // We failed to remove the thread due to a suspend request, loop and try again. } - delete self; + Thread::DeleteThread(self); // Release the thread ID after the thread is finished and deleted to avoid cases where we can // temporarily have multiple threads with the same thread id. When this occurs, it causes -- cgit v1.1