diff options
-rw-r--r-- | runtime/atomic.h | 549 | ||||
-rw-r--r-- | runtime/base/mutex.cc | 10 | ||||
-rw-r--r-- | runtime/interpreter/interpreter_goto_table_impl.cc | 4 | ||||
-rw-r--r-- | runtime/interpreter/interpreter_switch_impl.cc | 4 | ||||
-rw-r--r-- | runtime/mirror/object-inl.h | 38 | ||||
-rw-r--r-- | runtime/mirror/object.h | 7 | ||||
-rw-r--r-- | runtime/monitor.cc | 2 | ||||
-rw-r--r-- | runtime/native/sun_misc_Unsafe.cc | 6 |
8 files changed, 422 insertions, 198 deletions
diff --git a/runtime/atomic.h b/runtime/atomic.h index 9262db6..dda1801 100644 --- a/runtime/atomic.h +++ b/runtime/atomic.h @@ -35,161 +35,14 @@ namespace art { class Mutex; -#if ART_HAVE_STDATOMIC -template<typename T> -class Atomic : public std::atomic<T> { - public: - COMPILE_ASSERT(sizeof(T) == sizeof(std::atomic<T>), - std_atomic_size_differs_from_that_of_underlying_type); - COMPILE_ASSERT(alignof(T) == alignof(std::atomic<T>), - std_atomic_alignment_differs_from_that_of_underlying_type); - - Atomic<T>() : std::atomic<T>() { } - - explicit Atomic<T>(T value) : std::atomic<T>(value) { } - - // Load from memory without ordering or synchronization constraints. - T LoadRelaxed() const { - return this->load(std::memory_order_relaxed); - } - - // Load from memory with a total ordering. - T LoadSequentiallyConsistent() const { - return this->load(std::memory_order_seq_cst); - } - - // Store to memory without ordering or synchronization constraints. - void StoreRelaxed(T desired) { - this->store(desired, std::memory_order_relaxed); - } - - // Store to memory with a total ordering. - void StoreSequentiallyConsistent(T desired) { - this->store(desired, std::memory_order_seq_cst); - } - - // Atomically replace the value with desired value if it matches the expected value. Doesn't - // imply ordering or synchronization constraints. - bool CompareExchangeWeakRelaxed(T expected_value, T desired_value) { - return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_relaxed); - } - - // Atomically replace the value with desired value if it matches the expected value. Prior writes - // made to other memory locations by the thread that did the release become visible in this - // thread. - bool CompareExchangeWeakAcquire(T expected_value, T desired_value) { - return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_acquire); - } - - // Atomically replace the value with desired value if it matches the expected value. prior writes - // to other memory locations become visible to the threads that do a consume or an acquire on the - // same location. - bool CompareExchangeWeakRelease(T expected_value, T desired_value) { - return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_release); - } - - T FetchAndAddSequentiallyConsistent(const T value) { - return this->fetch_add(value, std::memory_order_seq_cst); // Return old_value. - } - - T FetchAndSubSequentiallyConsistent(const T value) { - return this->fetch_sub(value, std::memory_order_seq_cst); // Return old value. - } - - volatile T* Address() { - return reinterpret_cast<T*>(this); - } - - static T MaxValue() { - return std::numeric_limits<T>::max(); - } -}; -#else -template<typename T> -class Atomic { - public: - Atomic<T>() : value_(0) { } - - explicit Atomic<T>(T value) : value_(value) { } - - // Load from memory without ordering or synchronization constraints. - T LoadRelaxed() const { - return value_; - } - - // Load from memory with a total ordering. - T LoadSequentiallyConsistent() const; - - // Store to memory without ordering or synchronization constraints. - void StoreRelaxed(T desired) { - value_ = desired; - } - - // Store to memory with a total ordering. - void StoreSequentiallyConsistent(T desired); - - // Atomically replace the value with desired value if it matches the expected value. Doesn't - // imply ordering or synchronization constraints. - bool CompareExchangeWeakRelaxed(T expected_value, T desired_value) { - // TODO: make this relaxed. - return __sync_bool_compare_and_swap(&value_, expected_value, desired_value); - } - - // Atomically replace the value with desired value if it matches the expected value. Prior writes - // made to other memory locations by the thread that did the release become visible in this - // thread. - bool CompareExchangeWeakAcquire(T expected_value, T desired_value) { - // TODO: make this acquire. - return __sync_bool_compare_and_swap(&value_, expected_value, desired_value); - } - - // Atomically replace the value with desired value if it matches the expected value. prior writes - // to other memory locations become visible to the threads that do a consume or an acquire on the - // same location. - bool CompareExchangeWeakRelease(T expected_value, T desired_value) { - // TODO: make this release. - return __sync_bool_compare_and_swap(&value_, expected_value, desired_value); - } - - volatile T* Address() { - return &value_; - } - - T FetchAndAddSequentiallyConsistent(const T value) { - return __sync_fetch_and_add(&value_, value); // Return old_value. - } - - T FetchAndSubSequentiallyConsistent(const T value) { - return __sync_fetch_and_sub(&value_, value); // Return old value. - } - - T operator++() { // Prefix operator. - return __sync_add_and_fetch(&value_, 1); // Return new value. - } - - T operator++(int) { // Postfix operator. - return __sync_fetch_and_add(&value_, 1); // Return old value. - } - - T operator--() { // Prefix operator. - return __sync_sub_and_fetch(&value_, 1); // Return new value. - } - - T operator--(int) { // Postfix operator. - return __sync_fetch_and_sub(&value_, 1); // Return old value. - } - - static T MaxValue() { - return std::numeric_limits<T>::max(); - } - - private: - T value_; -}; -#endif - -typedef Atomic<int32_t> AtomicInteger; - +// QuasiAtomic encapsulates two separate facilities that we are +// trying to move away from: "quasiatomic" 64 bit operations +// and custom memory fences. For the time being, they remain +// exposed. Clients should be converted to use either class Atomic +// below whenever possible, and should eventually use C++11 atomics. +// The two facilities that do not have a good C++11 analog are +// ThreadFenceForConstructor and Atomic::*JavaData. +// // NOTE: Two "quasiatomic" operations on the exact same memory address // are guaranteed to operate atomically with respect to each other, // but no guarantees are made about quasiatomic operations mixed with @@ -286,6 +139,11 @@ class QuasiAtomic { // Atomically compare the value at "addr" to "old_value", if equal replace it with "new_value" // and return true. Otherwise, don't swap, and return false. + // This is fully ordered, i.e. it has C++11 memory_order_seq_cst + // semantics (assuming all other accesses use a mutex if this one does). + // This has "strong" semantics; if it fails then it is guaranteed that + // at some point during the execution of Cas64, *addr was not equal to + // old_value. static bool Cas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) { if (!kNeedSwapMutexes) { return __sync_bool_compare_and_swap(addr, old_value, new_value); @@ -299,9 +157,37 @@ class QuasiAtomic { return kNeedSwapMutexes; } - static void MembarLoadStore() { + #if ART_HAVE_STDATOMIC + + static void ThreadFenceAcquire () { + std::atomic_thread_fence(std::memory_order_acquire); + } + + static void ThreadFenceRelease () { + std::atomic_thread_fence(std::memory_order_release); + } + + static void ThreadFenceForConstructor() { + #if defined(__aarch64__) + __asm__ __volatile__("dmb ishst" : : : "memory"); + #else + std::atomic_thread_fence(std::memory_order_release); + #endif + } + + static void ThreadFenceSequentiallyConsistent() { + std::atomic_thread_fence(std::memory_order_seq_cst); + } + + #else + + static void ThreadFenceAcquire() { #if defined(__arm__) || defined(__aarch64__) __asm__ __volatile__("dmb ish" : : : "memory"); + // Could possibly use dmb ishld on aarch64 + // But currently we also use this on volatile loads + // to enforce store atomicity. Ishld is + // insufficient for that purpose. #elif defined(__i386__) || defined(__x86_64__) __asm__ __volatile__("" : : : "memory"); #elif defined(__mips__) @@ -311,9 +197,10 @@ class QuasiAtomic { #endif } - static void MembarLoadLoad() { + static void ThreadFenceRelease() { #if defined(__arm__) || defined(__aarch64__) __asm__ __volatile__("dmb ish" : : : "memory"); + // ishst doesn't order load followed by store. #elif defined(__i386__) || defined(__x86_64__) __asm__ __volatile__("" : : : "memory"); #elif defined(__mips__) @@ -323,7 +210,11 @@ class QuasiAtomic { #endif } - static void MembarStoreStore() { + // Fence at the end of a constructor with final fields + // or allocation. We believe this + // only has to order stores, and can thus be weaker than + // release on aarch64. + static void ThreadFenceForConstructor() { #if defined(__arm__) || defined(__aarch64__) __asm__ __volatile__("dmb ishst" : : : "memory"); #elif defined(__i386__) || defined(__x86_64__) @@ -335,7 +226,7 @@ class QuasiAtomic { #endif } - static void MembarStoreLoad() { + static void ThreadFenceSequentiallyConsistent() { #if defined(__arm__) || defined(__aarch64__) __asm__ __volatile__("dmb ish" : : : "memory"); #elif defined(__i386__) || defined(__x86_64__) @@ -346,6 +237,7 @@ class QuasiAtomic { #error Unexpected architecture #endif } + #endif private: static Mutex* GetSwapMutex(const volatile int64_t* addr); @@ -360,19 +252,350 @@ class QuasiAtomic { DISALLOW_COPY_AND_ASSIGN(QuasiAtomic); }; +#if ART_HAVE_STDATOMIC +template<typename T> +class Atomic : public std::atomic<T> { + public: + Atomic<T>() : std::atomic<T>() { } + + explicit Atomic<T>(T value) : std::atomic<T>(value) { } + + // Load from memory without ordering or synchronization constraints. + T LoadRelaxed() const { + return this->load(std::memory_order_relaxed); + } + + // Word tearing allowed, but may race. + // TODO: Optimize? + // There has been some discussion of eventually disallowing word + // tearing for Java data loads. + T LoadJavaData() const { + return this->load(std::memory_order_relaxed); + } + + // Load from memory with a total ordering. + // Corresponds exactly to a Java volatile load. + T LoadSequentiallyConsistent() const { + return this->load(std::memory_order_seq_cst); + } + + // Store to memory without ordering or synchronization constraints. + void StoreRelaxed(T desired) { + this->store(desired, std::memory_order_relaxed); + } + + // Word tearing allowed, but may race. + void StoreJavaData(T desired) { + this->store(desired, std::memory_order_relaxed); + } + + // Store to memory with release ordering. + void StoreRelease(T desired) { + this->store(desired, std::memory_order_release); + } + + // Store to memory with a total ordering. + void StoreSequentiallyConsistent(T desired) { + this->store(desired, std::memory_order_seq_cst); + } + + // Atomically replace the value with desired value if it matches the expected value. + // Participates in total ordering of atomic operations. + bool CompareExchangeStrongSequentiallyConsistent(T expected_value, T desired_value) { + return this->compare_exchange_strong(expected_value, desired_value, std::memory_order_seq_cst); + } + + // The same, except it may fail spuriously. + bool CompareExchangeWeakSequentiallyConsistent(T expected_value, T desired_value) { + return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_seq_cst); + } + + // Atomically replace the value with desired value if it matches the expected value. Doesn't + // imply ordering or synchronization constraints. + bool CompareExchangeStrongRelaxed(T expected_value, T desired_value) { + return this->compare_exchange_strong(expected_value, desired_value, std::memory_order_relaxed); + } + + // The same, except it may fail spuriously. + bool CompareExchangeWeakRelaxed(T expected_value, T desired_value) { + return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_relaxed); + } + + // Atomically replace the value with desired value if it matches the expected value. Prior writes + // made to other memory locations by the thread that did the release become visible in this + // thread. + bool CompareExchangeWeakAcquire(T expected_value, T desired_value) { + return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_acquire); + } + + // Atomically replace the value with desired value if it matches the expected value. prior writes + // to other memory locations become visible to the threads that do a consume or an acquire on the + // same location. + bool CompareExchangeWeakRelease(T expected_value, T desired_value) { + return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_release); + } + + T FetchAndAddSequentiallyConsistent(const T value) { + return this->fetch_add(value, std::memory_order_seq_cst); // Return old_value. + } + + T FetchAndSubSequentiallyConsistent(const T value) { + return this->fetch_sub(value, std::memory_order_seq_cst); // Return old value. + } + + volatile T* Address() { + return reinterpret_cast<T*>(this); + } + + static T MaxValue() { + return std::numeric_limits<T>::max(); + } + +}; + +#else + +template<typename T> class Atomic; + +// Helper class for Atomic to deal separately with size 8 and small +// objects. Should not be used directly. + +template<int SZ, class T> struct AtomicHelper { + friend class Atomic<T>; + +private: + COMPILE_ASSERT(sizeof(T) <= 4, bad_atomic_helper_arg); + + static T LoadRelaxed(const volatile T* loc) { + // sizeof(T) <= 4 + return *loc; + } + + static void StoreRelaxed(volatile T* loc, T desired) { + // sizeof(T) <= 4 + *loc = desired; + } + + static bool CompareExchangeStrongSequentiallyConsistent(volatile T* loc, + T expected_value, T desired_value) { + // sizeof(T) <= 4 + return __sync_bool_compare_and_swap(loc, expected_value, desired_value); + } +}; + +template<class T> struct AtomicHelper<8, T> { + friend class Atomic<T>; + +private: + COMPILE_ASSERT(sizeof(T) == 8, bad_large_atomic_helper_arg); + + static T LoadRelaxed(const volatile T* loc) { + // sizeof(T) == 8 + volatile const int64_t* loc_ptr = + reinterpret_cast<volatile const int64_t*>(loc); + return reinterpret_cast<T>(QuasiAtomic::Read64(loc_ptr)); + } + + static void StoreRelaxed(volatile T* loc, T desired) { + // sizeof(T) == 8 + volatile int64_t* loc_ptr = + reinterpret_cast<volatile int64_t*>(loc); + QuasiAtomic::Write64(loc_ptr, + reinterpret_cast<int64_t>(desired)); + } + + + static bool CompareExchangeStrongSequentiallyConsistent(volatile T* loc, + T expected_value, T desired_value) { + // sizeof(T) == 8 + volatile int64_t* loc_ptr = reinterpret_cast<volatile int64_t*>(loc); + return QuasiAtomic::Cas64( + reinterpret_cast<int64_t>(expected_value), + reinterpret_cast<int64_t>(desired_value), loc_ptr); + } +}; + +template<typename T> +class Atomic { + + private: + COMPILE_ASSERT(sizeof(T) <= 4 || sizeof(T) == 8, bad_atomic_arg); + + public: + Atomic<T>() : value_(0) { } + + explicit Atomic<T>(T value) : value_(value) { } + + // Load from memory without ordering or synchronization constraints. + T LoadRelaxed() const { + return AtomicHelper<sizeof(T),T>::LoadRelaxed(&value_); + } + + // Word tearing allowed, but may race. + T LoadJavaData() const { + return value_; + } + + // Load from memory with a total ordering. + T LoadSequentiallyConsistent() const; + + // Store to memory without ordering or synchronization constraints. + void StoreRelaxed(T desired) { + AtomicHelper<sizeof(T),T>::StoreRelaxed(&value_,desired); + } + + // Word tearing allowed, but may race. + void StoreJavaData(T desired) { + value_ = desired; + } + + // Store to memory with release ordering. + void StoreRelease(T desired); + + // Store to memory with a total ordering. + void StoreSequentiallyConsistent(T desired); + + // Atomically replace the value with desired value if it matches the expected value. + // Participates in total ordering of atomic operations. + bool CompareExchangeStrongSequentiallyConsistent(T expected_value, T desired_value) { + return AtomicHelper<sizeof(T),T>:: + CompareExchangeStrongSequentiallyConsistent(&value_, expected_value, desired_value); + } + + // The same, but may fail spuriously. + bool CompareExchangeWeakSequentiallyConsistent(T expected_value, T desired_value) { + // TODO: Take advantage of the fact that it may fail spuriously. + return AtomicHelper<sizeof(T),T>:: + CompareExchangeStrongSequentiallyConsistent(&value_, expected_value, desired_value); + } + + // Atomically replace the value with desired value if it matches the expected value. Doesn't + // imply ordering or synchronization constraints. + bool CompareExchangeStrongRelaxed(T expected_value, T desired_value) { + // TODO: make this relaxed. + return CompareExchangeStrongSequentiallyConsistent(expected_value, desired_value); + } + + // The same, but may fail spuriously. + bool CompareExchangeWeakRelaxed(T expected_value, T desired_value) { + // TODO: Take advantage of the fact that it may fail spuriously. + // TODO: make this relaxed. + return CompareExchangeStrongSequentiallyConsistent(expected_value, desired_value); + } + + // Atomically replace the value with desired value if it matches the expected value. Prior accesses + // made to other memory locations by the thread that did the release become visible in this + // thread. + bool CompareExchangeWeakAcquire(T expected_value, T desired_value) { + // TODO: make this acquire. + return CompareExchangeWeakSequentiallyConsistent(expected_value, desired_value); + } + + // Atomically replace the value with desired value if it matches the expected value. Prior accesses + // to other memory locations become visible to the threads that do a consume or an acquire on the + // same location. + bool CompareExchangeWeakRelease(T expected_value, T desired_value) { + // TODO: make this release. + return CompareExchangeWeakSequentiallyConsistent(expected_value, desired_value); + } + + volatile T* Address() { + return &value_; + } + + T FetchAndAddSequentiallyConsistent(const T value) { + if (sizeof(T) <= 4) { + return __sync_fetch_and_add(&value_, value); // Return old value. + } else { + T expected; + do { + expected = LoadRelaxed(); + } while (!CompareExchangeWeakSequentiallyConsistent(expected, expected + value)); + return expected; + } + } + + T FetchAndSubSequentiallyConsistent(const T value) { + if (sizeof(T) <= 4) { + return __sync_fetch_and_sub(&value_, value); // Return old value. + } else { + return FetchAndAddSequentiallyConsistent(-value); + } + } + + T operator++() { // Prefix operator. + if (sizeof(T) <= 4) { + return __sync_add_and_fetch(&value_, 1); // Return new value. + } else { + return FetchAndAddSequentiallyConsistent(1) + 1; + } + } + + T operator++(int) { // Postfix operator. + return FetchAndAddSequentiallyConsistent(1); + } + + T operator--() { // Prefix operator. + if (sizeof(T) <= 4) { + return __sync_sub_and_fetch(&value_, 1); // Return new value. + } else { + return FetchAndSubSequentiallyConsistent(1) - 1; + } + } + + T operator--(int) { // Postfix operator. + return FetchAndSubSequentiallyConsistent(1); + } + + static T MaxValue() { + return std::numeric_limits<T>::max(); + } + + + private: + volatile T value_; +}; +#endif + +typedef Atomic<int32_t> AtomicInteger; + +COMPILE_ASSERT(sizeof(AtomicInteger) == sizeof(int32_t), weird_atomic_int_size); +COMPILE_ASSERT(alignof(AtomicInteger) == alignof(int32_t), + atomic_int_alignment_differs_from_that_of_underlying_type); +COMPILE_ASSERT(sizeof(Atomic<long long>) == sizeof(long long), weird_atomic_long_long_size); +COMPILE_ASSERT(alignof(Atomic<long long>) == alignof(long long), + atomic_long_long_alignment_differs_from_that_of_underlying_type); + + #if !ART_HAVE_STDATOMIC template<typename T> inline T Atomic<T>::LoadSequentiallyConsistent() const { T result = value_; - QuasiAtomic::MembarLoadLoad(); + if (sizeof(T) != 8 || !QuasiAtomic::LongAtomicsUseMutexes()) { + QuasiAtomic::ThreadFenceAcquire(); + // We optimistically assume this suffices for store atomicity. + // On ARMv8 we strengthen ThreadFenceAcquire to make that true. + } return result; } template<typename T> +inline void Atomic<T>::StoreRelease(T desired) { + if (sizeof(T) != 8 || !QuasiAtomic::LongAtomicsUseMutexes()) { + QuasiAtomic::ThreadFenceRelease(); + } + StoreRelaxed(desired); +} + +template<typename T> inline void Atomic<T>::StoreSequentiallyConsistent(T desired) { - QuasiAtomic::MembarStoreStore(); - value_ = desired; - QuasiAtomic::MembarStoreLoad(); + if (sizeof(T) != 8 || !QuasiAtomic::LongAtomicsUseMutexes()) { + QuasiAtomic::ThreadFenceRelease(); + } + StoreRelaxed(desired); + if (sizeof(T) != 8 || !QuasiAtomic::LongAtomicsUseMutexes()) { + QuasiAtomic::ThreadFenceSequentiallyConsistent(); + } } #endif diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc index 11698e2..aeece74 100644 --- a/runtime/base/mutex.cc +++ b/runtime/base/mutex.cc @@ -331,7 +331,10 @@ void Mutex::ExclusiveLock(Thread* self) { num_contenders_--; } } while (!done); - QuasiAtomic::MembarStoreLoad(); + // We assert that no memory fence is needed here, since + // __sync_bool_compare_and_swap includes it. + // TODO: Change state_ to be a art::Atomic and use an intention revealing CAS operation + // that exposes the ordering semantics. DCHECK_EQ(state_, 1); exclusive_owner_ = SafeGetTid(self); #else @@ -364,7 +367,7 @@ bool Mutex::ExclusiveTryLock(Thread* self) { return false; } } while (!done); - QuasiAtomic::MembarStoreLoad(); + // We again assert no memory fence is needed. DCHECK_EQ(state_, 1); exclusive_owner_ = SafeGetTid(self); #else @@ -403,7 +406,7 @@ void Mutex::ExclusiveUnlock(Thread* self) { do { int32_t cur_state = state_; if (LIKELY(cur_state == 1)) { - QuasiAtomic::MembarStoreStore(); + // The __sync_bool_compare_and_swap enforces the necessary memory ordering. // We're no longer the owner. exclusive_owner_ = 0; // Change state to 0. @@ -426,7 +429,6 @@ void Mutex::ExclusiveUnlock(Thread* self) { } } } while (!done); - QuasiAtomic::MembarStoreLoad(); #else CHECK_MUTEX_CALL(pthread_mutex_unlock, (&mutex_)); #endif diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc index 99153c8..623d9c3 100644 --- a/runtime/interpreter/interpreter_goto_table_impl.cc +++ b/runtime/interpreter/interpreter_goto_table_impl.cc @@ -247,7 +247,7 @@ JValue ExecuteGotoImpl(Thread* self, MethodHelper& mh, const DexFile::CodeItem* // If access checks are required then the dex-to-dex compiler and analysis of // whether the class has final fields hasn't been performed. Conservatively // perform the memory barrier now. - QuasiAtomic::MembarStoreLoad(); + QuasiAtomic::ThreadFenceForConstructor(); } if (UNLIKELY(self->TestAllFlags())) { CheckSuspend(self); @@ -266,7 +266,7 @@ JValue ExecuteGotoImpl(Thread* self, MethodHelper& mh, const DexFile::CodeItem* HANDLE_INSTRUCTION_END(); HANDLE_INSTRUCTION_START(RETURN_VOID_BARRIER) { - QuasiAtomic::MembarStoreLoad(); + QuasiAtomic::ThreadFenceForConstructor(); JValue result; if (UNLIKELY(self->TestAllFlags())) { CheckSuspend(self); diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc index 3c7880c..d592a53 100644 --- a/runtime/interpreter/interpreter_switch_impl.cc +++ b/runtime/interpreter/interpreter_switch_impl.cc @@ -175,7 +175,7 @@ JValue ExecuteSwitchImpl(Thread* self, MethodHelper& mh, const DexFile::CodeItem // If access checks are required then the dex-to-dex compiler and analysis of // whether the class has final fields hasn't been performed. Conservatively // perform the memory barrier now. - QuasiAtomic::MembarStoreLoad(); + QuasiAtomic::ThreadFenceForConstructor(); } if (UNLIKELY(self->TestAllFlags())) { CheckSuspend(self); @@ -191,7 +191,7 @@ JValue ExecuteSwitchImpl(Thread* self, MethodHelper& mh, const DexFile::CodeItem return result; } case Instruction::RETURN_VOID_BARRIER: { - QuasiAtomic::MembarStoreLoad(); + QuasiAtomic::ThreadFenceForConstructor(); JValue result; if (UNLIKELY(self->TestAllFlags())) { CheckSuspend(self); diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h index 11735fb..567ce3e 100644 --- a/runtime/mirror/object-inl.h +++ b/runtime/mirror/object-inl.h @@ -405,11 +405,9 @@ inline int32_t Object::GetField32(MemberOffset field_offset) { const byte* raw_addr = reinterpret_cast<const byte*>(this) + field_offset.Int32Value(); const int32_t* word_addr = reinterpret_cast<const int32_t*>(raw_addr); if (UNLIKELY(kIsVolatile)) { - int32_t result = *(reinterpret_cast<volatile int32_t*>(const_cast<int32_t*>(word_addr))); - QuasiAtomic::MembarLoadLoad(); // Ensure volatile loads don't re-order. - return result; + return reinterpret_cast<const Atomic<int32_t>*>(word_addr)->LoadSequentiallyConsistent(); } else { - return *word_addr; + return reinterpret_cast<const Atomic<int32_t>*>(word_addr)->LoadJavaData(); } } @@ -435,11 +433,9 @@ inline void Object::SetField32(MemberOffset field_offset, int32_t new_value) { byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value(); int32_t* word_addr = reinterpret_cast<int32_t*>(raw_addr); if (kIsVolatile) { - QuasiAtomic::MembarStoreStore(); // Ensure this store occurs after others in the queue. - *word_addr = new_value; - QuasiAtomic::MembarStoreLoad(); // Ensure this store occurs before any volatile loads. + reinterpret_cast<Atomic<int32_t>*>(word_addr)->StoreSequentiallyConsistent(new_value); } else { - *word_addr = new_value; + reinterpret_cast<Atomic<int32_t>*>(word_addr)->StoreJavaData(new_value); } } @@ -461,6 +457,7 @@ inline bool Object::CasField32(MemberOffset field_offset, int32_t old_value, int } byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value(); volatile int32_t* addr = reinterpret_cast<volatile int32_t*>(raw_addr); + return __sync_bool_compare_and_swap(addr, old_value, new_value); } @@ -472,11 +469,9 @@ inline int64_t Object::GetField64(MemberOffset field_offset) { const byte* raw_addr = reinterpret_cast<const byte*>(this) + field_offset.Int32Value(); const int64_t* addr = reinterpret_cast<const int64_t*>(raw_addr); if (kIsVolatile) { - int64_t result = QuasiAtomic::Read64(addr); - QuasiAtomic::MembarLoadLoad(); // Ensure volatile loads don't re-order. - return result; + return reinterpret_cast<const Atomic<int64_t>*>(addr)->LoadSequentiallyConsistent(); } else { - return *addr; + return reinterpret_cast<const Atomic<int64_t>*>(addr)->LoadJavaData(); } } @@ -502,15 +497,9 @@ inline void Object::SetField64(MemberOffset field_offset, int64_t new_value) { byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value(); int64_t* addr = reinterpret_cast<int64_t*>(raw_addr); if (kIsVolatile) { - QuasiAtomic::MembarStoreStore(); // Ensure this store occurs after others in the queue. - QuasiAtomic::Write64(addr, new_value); - if (!QuasiAtomic::LongAtomicsUseMutexes()) { - QuasiAtomic::MembarStoreLoad(); // Ensure this store occurs before any volatile loads. - } else { - // Fence from from mutex is enough. - } + reinterpret_cast<Atomic<int64_t>*>(addr)->StoreSequentiallyConsistent(new_value); } else { - *addr = new_value; + reinterpret_cast<Atomic<int64_t>*>(addr)->StoreJavaData(new_value); } } @@ -546,7 +535,8 @@ inline T* Object::GetFieldObject(MemberOffset field_offset) { HeapReference<T>* objref_addr = reinterpret_cast<HeapReference<T>*>(raw_addr); T* result = ReadBarrier::Barrier<T, kReadBarrierOption>(this, field_offset, objref_addr); if (kIsVolatile) { - QuasiAtomic::MembarLoadLoad(); // Ensure loads don't re-order. + // TODO: Refactor to use a SequentiallyConsistent load instead. + QuasiAtomic::ThreadFenceAcquire(); // Ensure visibility of operations preceding store. } if (kVerifyFlags & kVerifyReads) { VerifyObject(result); @@ -584,9 +574,11 @@ inline void Object::SetFieldObjectWithoutWriteBarrier(MemberOffset field_offset, byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value(); HeapReference<Object>* objref_addr = reinterpret_cast<HeapReference<Object>*>(raw_addr); if (kIsVolatile) { - QuasiAtomic::MembarStoreStore(); // Ensure this store occurs after others in the queue. + // TODO: Refactor to use a SequentiallyConsistent store instead. + QuasiAtomic::ThreadFenceRelease(); // Ensure that prior accesses are visible before store. objref_addr->Assign(new_value); - QuasiAtomic::MembarStoreLoad(); // Ensure this store occurs before any loads. + QuasiAtomic::ThreadFenceSequentiallyConsistent(); + // Ensure this store occurs before any volatile loads. } else { objref_addr->Assign(new_value); } diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h index f0c5a73..c082443 100644 --- a/runtime/mirror/object.h +++ b/runtime/mirror/object.h @@ -103,6 +103,13 @@ class MANAGED LOCKABLE Object { // avoids the barriers. LockWord GetLockWord(bool as_volatile) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void SetLockWord(LockWord new_val, bool as_volatile) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + // All Cas operations defined here have C++11 memory_order_seq_cst ordering + // semantics: Preceding memory operations become visible to other threads + // before the CAS, and subsequent operations become visible after the CAS. + // The Cas operations defined here do not fail spuriously, i.e. they + // have C++11 "strong" semantics. + // TODO: In most, possibly all, cases, these assumptions are too strong. + // Confirm and weaken the implementation. bool CasLockWord(LockWord old_val, LockWord new_val) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); uint32_t GetLockOwnerThreadId(); diff --git a/runtime/monitor.cc b/runtime/monitor.cc index 58e6dd4..f73ef1e 100644 --- a/runtime/monitor.cc +++ b/runtime/monitor.cc @@ -694,7 +694,7 @@ mirror::Object* Monitor::MonitorEnter(Thread* self, mirror::Object* obj) { case LockWord::kUnlocked: { LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0)); if (h_obj->CasLockWord(lock_word, thin_locked)) { - QuasiAtomic::MembarLoadLoad(); + // CasLockWord enforces more than the acquire ordering we need here. return h_obj.Get(); // Success! } continue; // Go again. diff --git a/runtime/native/sun_misc_Unsafe.cc b/runtime/native/sun_misc_Unsafe.cc index 764db5e..d23cfff 100644 --- a/runtime/native/sun_misc_Unsafe.cc +++ b/runtime/native/sun_misc_Unsafe.cc @@ -83,7 +83,7 @@ static void Unsafe_putOrderedInt(JNIEnv* env, jobject, jobject javaObj, jlong of jint newValue) { ScopedFastNativeObjectAccess soa(env); mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj); - QuasiAtomic::MembarStoreStore(); + QuasiAtomic::ThreadFenceRelease(); // JNI must use non transactional mode. obj->SetField32<false>(MemberOffset(offset), newValue); } @@ -119,7 +119,7 @@ static void Unsafe_putOrderedLong(JNIEnv* env, jobject, jobject javaObj, jlong o jlong newValue) { ScopedFastNativeObjectAccess soa(env); mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj); - QuasiAtomic::MembarStoreStore(); + QuasiAtomic::ThreadFenceRelease(); // JNI must use non transactional mode. obj->SetField64<false>(MemberOffset(offset), newValue); } @@ -161,7 +161,7 @@ static void Unsafe_putOrderedObject(JNIEnv* env, jobject, jobject javaObj, jlong ScopedFastNativeObjectAccess soa(env); mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj); mirror::Object* newValue = soa.Decode<mirror::Object*>(javaNewValue); - QuasiAtomic::MembarStoreStore(); + QuasiAtomic::ThreadFenceRelease(); // JNI must use non transactional mode. obj->SetFieldObject<false>(MemberOffset(offset), newValue); } |