diff options
author | Ian Rogers <irogers@google.com> | 2013-11-19 18:00:50 -0800 |
---|---|---|
committer | Ian Rogers <irogers@google.com> | 2013-12-20 08:01:57 -0800 |
commit | b122a4bbed34ab22b4c1541ee25e5cf22f12a926 (patch) | |
tree | 624f16271f4481a8fd5aa2f607385f490dc7b3ae /runtime | |
parent | e40687d053b89c495b6fbeb7a766b01c9c7e039c (diff) | |
download | art-b122a4bbed34ab22b4c1541ee25e5cf22f12a926.zip art-b122a4bbed34ab22b4c1541ee25e5cf22f12a926.tar.gz art-b122a4bbed34ab22b4c1541ee25e5cf22f12a926.tar.bz2 |
Tidy up memory barriers.
Change-Id: I937ea93e6df1835ecfe2d4bb7d84c24fe7fc097b
Diffstat (limited to 'runtime')
26 files changed, 271 insertions, 256 deletions
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index 61be14b..34de93f 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -359,6 +359,7 @@ retry_lock: @ unlocked case - r2 holds thread id with count of 0 strex r3, r2, [r0, #LOCK_WORD_OFFSET] cbnz r3, strex_fail @ store failed, retry + dmb ish @ full (LoadLoad) memory barrier bx lr strex_fail: b retry_lock @ unlikely forward branch, need to reload and recheck r1/r2 @@ -402,6 +403,7 @@ ENTRY art_quick_unlock_object bpl recursive_thin_unlock @ transition to unlocked, r3 holds 0 str r3, [r0, #LOCK_WORD_OFFSET] + dmb ish @ full (StoreLoad) memory barrier bx lr recursive_thin_unlock: sub r1, r1, #65536 diff --git a/runtime/atomic.cc b/runtime/atomic.cc index 47cee6a..bac0a99 100644 --- a/runtime/atomic.cc +++ b/runtime/atomic.cc @@ -15,135 +15,52 @@ */ #include "atomic.h" - -#define NEED_SWAP_MUTEXES !defined(__arm__) && !defined(__i386__) - -#if NEED_SWAP_MUTEXES -#include <vector> #include "base/mutex.h" #include "base/stl_util.h" -#include "base/stringprintf.h" #include "thread-inl.h" -#endif namespace art { -#if NEED_SWAP_MUTEXES -// We stripe across a bunch of different mutexes to reduce contention. -static const size_t kSwapMutexCount = 32; -static std::vector<Mutex*>* gSwapMutexes; +std::vector<Mutex*>* QuasiAtomic::gSwapMutexes = nullptr; -static Mutex& GetSwapMutex(const volatile int64_t* addr) { - return *(*gSwapMutexes)[(reinterpret_cast<unsigned>(addr) >> 3U) % kSwapMutexCount]; +Mutex* QuasiAtomic::GetSwapMutex(const volatile int64_t* addr) { + return (*gSwapMutexes)[(reinterpret_cast<unsigned>(addr) >> 3U) % kSwapMutexCount]; } -#endif void QuasiAtomic::Startup() { -#if NEED_SWAP_MUTEXES - gSwapMutexes = new std::vector<Mutex*>; - for (size_t i = 0; i < kSwapMutexCount; ++i) { - gSwapMutexes->push_back(new Mutex("QuasiAtomic stripe")); + if (kNeedSwapMutexes) { + gSwapMutexes = new std::vector<Mutex*>; + for (size_t i = 0; i < kSwapMutexCount; ++i) { + gSwapMutexes->push_back(new Mutex("QuasiAtomic stripe")); + } } -#endif } void QuasiAtomic::Shutdown() { -#if NEED_SWAP_MUTEXES - STLDeleteElements(gSwapMutexes); - delete gSwapMutexes; -#endif + if (kNeedSwapMutexes) { + STLDeleteElements(gSwapMutexes); + delete gSwapMutexes; + } } -int64_t QuasiAtomic::Read64(volatile const int64_t* addr) { - int64_t value; -#if NEED_SWAP_MUTEXES - MutexLock mu(Thread::Current(), GetSwapMutex(addr)); - value = *addr; -#elif defined(__arm__) - // Exclusive loads are defined not to tear, clearing the exclusive state isn't necessary. If we - // have LPAE (such as Cortex-A15) then ldrd would suffice. - __asm__ __volatile__("@ QuasiAtomic::Read64\n" - "ldrexd %0, %H0, [%1]" - : "=&r" (value) - : "r" (addr)); -#elif defined(__i386__) - __asm__ __volatile__( - "movq %1, %0\n" - : "=x" (value) - : "m" (*addr)); -#else -#error Unexpected architecture -#endif - return value; +int64_t QuasiAtomic::SwapMutexRead64(volatile const int64_t* addr) { + MutexLock mu(Thread::Current(), *GetSwapMutex(addr)); + return *addr; } -void QuasiAtomic::Write64(volatile int64_t* addr, int64_t value) { -#if NEED_SWAP_MUTEXES - MutexLock mu(Thread::Current(), GetSwapMutex(addr)); +void QuasiAtomic::SwapMutexWrite64(volatile int64_t* addr, int64_t value) { + MutexLock mu(Thread::Current(), *GetSwapMutex(addr)); *addr = value; -#elif defined(__arm__) - // The write is done as a swap so that the cache-line is in the exclusive state for the store. If - // we know that ARM architecture has LPAE (such as Cortex-A15) this isn't necessary and strd will - // suffice. - int64_t prev; - int status; - do { - __asm__ __volatile__("@ QuasiAtomic::Write64\n" - "ldrexd %0, %H0, [%3]\n" - "strexd %1, %4, %H4, [%3]" - : "=&r" (prev), "=&r" (status), "+m"(*addr) - : "r" (addr), "r" (value) - : "cc"); - } while (__builtin_expect(status != 0, 0)); -#elif defined(__i386__) - __asm__ __volatile__( - "movq %1, %0" - : "=m" (*addr) - : "x" (value)); -#else -#error Unexpected architecture -#endif } -bool QuasiAtomic::Cas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) { -#if NEED_SWAP_MUTEXES - MutexLock mu(Thread::Current(), GetSwapMutex(addr)); +bool QuasiAtomic::SwapMutexCas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) { + MutexLock mu(Thread::Current(), *GetSwapMutex(addr)); if (*addr == old_value) { *addr = new_value; return true; } return false; -#elif defined(__arm__) - int64_t prev; - int status; - do { - __asm__ __volatile__("@ QuasiAtomic::Cas64\n" - "ldrexd %0, %H0, [%3]\n" - "mov %1, #0\n" - "teq %0, %4\n" - "teqeq %H0, %H4\n" - "strexdeq %1, %5, %H5, [%3]" - : "=&r" (prev), "=&r" (status), "+m"(*addr) - : "r" (addr), "Ir" (old_value), "r" (new_value) - : "cc"); - } while (__builtin_expect(status != 0, 0)); - return prev == old_value; -#elif defined(__i386__) - // The compiler does the right job and works better than inline assembly, especially with -O0 - // compilation. - return __sync_bool_compare_and_swap(addr, old_value, new_value); -#else -#error Unexpected architecture -#endif -} - -bool QuasiAtomic::LongAtomicsUseMutexes() { -#if NEED_SWAP_MUTEXES - return true; -#else - return false; -#endif } } // namespace art diff --git a/runtime/atomic.h b/runtime/atomic.h index cb6f86b..b1e9870 100644 --- a/runtime/atomic.h +++ b/runtime/atomic.h @@ -18,11 +18,14 @@ #define ART_RUNTIME_ATOMIC_H_ #include <stdint.h> +#include <vector> #include "base/macros.h" namespace art { +class Mutex; + // NOTE: Two "quasiatomic" operations on the exact same memory address // are guaranteed to operate atomically with respect to each other, // but no guarantees are made about quasiatomic operations mixed with @@ -30,25 +33,108 @@ namespace art { // quasiatomic operations that are performed on partially-overlapping // memory. class QuasiAtomic { +#if !defined(__arm__) && !defined(__i386__) + static constexpr bool kNeedSwapMutexes = true; +#else + static constexpr bool kNeedSwapMutexes = false; +#endif + public: static void Startup(); static void Shutdown(); // Reads the 64-bit value at "addr" without tearing. - static int64_t Read64(volatile const int64_t* addr); + static int64_t Read64(volatile const int64_t* addr) { + if (!kNeedSwapMutexes) { + return *addr; + } else { + return SwapMutexRead64(addr); + } + } // Writes to the 64-bit value at "addr" without tearing. - static void Write64(volatile int64_t* addr, int64_t val); + static void Write64(volatile int64_t* addr, int64_t val) { + if (!kNeedSwapMutexes) { + *addr = val; + } else { + SwapMutexWrite64(addr, val); + } + } // Atomically compare the value at "addr" to "old_value", if equal replace it with "new_value" // and return true. Otherwise, don't swap, and return false. - static bool Cas64(int64_t old_value, int64_t new_value, volatile int64_t* addr); + static bool Cas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) { + if (!kNeedSwapMutexes) { + return __sync_bool_compare_and_swap(addr, old_value, new_value); + } else { + return SwapMutexCas64(old_value, new_value, addr); + } + } // Does the architecture provide reasonable atomic long operations or do we fall back on mutexes? - static bool LongAtomicsUseMutexes(); + static bool LongAtomicsUseMutexes() { + return !kNeedSwapMutexes; + } + + static void MembarLoadStore() { + #if defined(__arm__) + __asm__ __volatile__("dmb ish" : : : "memory"); + #elif defined(__i386__) + __asm__ __volatile__("" : : : "memory"); + #elif defined(__mips__) + __asm__ __volatile__("sync" : : : "memory"); + #else + #error Unexpected architecture + #endif + } + + static void MembarLoadLoad() { + #if defined(__arm__) + __asm__ __volatile__("dmb ish" : : : "memory"); + #elif defined(__i386__) + __asm__ __volatile__("" : : : "memory"); + #elif defined(__mips__) + __asm__ __volatile__("sync" : : : "memory"); + #else + #error Unexpected architecture + #endif + } + + static void MembarStoreStore() { + #if defined(__arm__) + __asm__ __volatile__("dmb ishst" : : : "memory"); + #elif defined(__i386__) + __asm__ __volatile__("" : : : "memory"); + #elif defined(__mips__) + __asm__ __volatile__("sync" : : : "memory"); + #else + #error Unexpected architecture + #endif + } + + static void MembarStoreLoad() { + #if defined(__arm__) + __asm__ __volatile__("dmb ish" : : : "memory"); + #elif defined(__i386__) + __asm__ __volatile__("mfence" : : : "memory"); + #elif defined(__mips__) + __asm__ __volatile__("sync" : : : "memory"); + #else + #error Unexpected architecture + #endif + } private: + static Mutex* GetSwapMutex(const volatile int64_t* addr); + static int64_t SwapMutexRead64(volatile const int64_t* addr); + static void SwapMutexWrite64(volatile int64_t* addr, int64_t val); + static bool SwapMutexCas64(int64_t old_value, int64_t new_value, volatile int64_t* addr); + + // We stripe across a bunch of different mutexes to reduce contention. + static constexpr size_t kSwapMutexCount = 32; + static std::vector<Mutex*>* gSwapMutexes; + DISALLOW_COPY_AND_ASSIGN(QuasiAtomic); }; diff --git a/runtime/atomic_integer.h b/runtime/atomic_integer.h index 132f968..651ca4a 100644 --- a/runtime/atomic_integer.h +++ b/runtime/atomic_integer.h @@ -17,8 +17,7 @@ #ifndef ART_RUNTIME_ATOMIC_INTEGER_H_ #define ART_RUNTIME_ATOMIC_INTEGER_H_ -#include "cutils/atomic.h" -#include "cutils/atomic-inline.h" +#include <stdint.h> namespace art { @@ -28,53 +27,57 @@ class AtomicInteger { explicit AtomicInteger(int32_t value) : value_(value) { } - // Unsafe = operator for non atomic operations on the integer. - void store(int32_t desired) { - value_ = desired; - } - AtomicInteger& operator=(int32_t desired) { - store(desired); + Store(desired); return *this; } - int32_t load() const { + int32_t Load() const { return value_; } operator int32_t() const { - return load(); + return Load(); + } + + int32_t FetchAndAdd(const int32_t value) { + return __sync_fetch_and_add(&value_, value); // Return old_value. } - int32_t fetch_add(const int32_t value) { - return android_atomic_add(value, &value_); + int32_t FetchAndSub(const int32_t value) { + return __sync_fetch_and_sub(&value_, value); // Return old value. } - int32_t fetch_sub(const int32_t value) { - return android_atomic_add(-value, &value_); + int32_t operator++() { // Prefix operator. + return __sync_add_and_fetch(&value_, 1); // Return new value. } - int32_t operator++() { - return android_atomic_inc(&value_) + 1; + int32_t operator++(int32_t) { // Postfix operator. + return __sync_fetch_and_add(&value_, 1); // Return old value. } - int32_t operator++(int32_t) { - return android_atomic_inc(&value_); + int32_t operator--() { // Prefix operator. + return __sync_sub_and_fetch(&value_, 1); // Return new value. } - int32_t operator--() { - return android_atomic_dec(&value_) - 1; + int32_t operator--(int32_t) { // Postfix operator. + return __sync_fetch_and_sub(&value_, 1); // Return old value. } - int32_t operator--(int32_t) { - return android_atomic_dec(&value_); + bool CompareAndSwap(int32_t expected_value, int32_t desired_value) { + return __sync_bool_compare_and_swap(&value_, expected_value, desired_value); } - bool compare_and_swap(int32_t expected_value, int32_t desired_value) { - return android_atomic_cas(expected_value, desired_value, &value_) == 0; + volatile int32_t* Address() { + return &value_; } private: + // Unsafe = operator for non atomic operations on the integer. + void Store(int32_t desired) { + value_ = desired; + } + volatile int32_t value_; }; diff --git a/runtime/base/bounded_fifo.h b/runtime/base/bounded_fifo.h index cb92d40..d04840a 100644 --- a/runtime/base/bounded_fifo.h +++ b/runtime/base/bounded_fifo.h @@ -17,9 +17,6 @@ #ifndef ART_RUNTIME_BASE_BOUNDED_FIFO_H_ #define ART_RUNTIME_BASE_BOUNDED_FIFO_H_ -#include "cutils/atomic.h" -#include "cutils/atomic-inline.h" - namespace art { // A bounded fifo is a fifo which has a bounded size. The power of two version uses a bit mask to @@ -49,7 +46,7 @@ class BoundedFifoPowerOfTwo { void push_back(const T& value) { ++size_; DCHECK_LE(size_, MaxSize); - // Relies on integer overflow behaviour. + // Relies on integer overflow behavior. data_[back_index_++ & mask_] = value; } diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc index ec79c55..05e3a83 100644 --- a/runtime/base/mutex.cc +++ b/runtime/base/mutex.cc @@ -21,8 +21,6 @@ #include "atomic.h" #include "base/logging.h" -#include "cutils/atomic.h" -#include "cutils/atomic-inline.h" #include "mutex-inl.h" #include "runtime.h" #include "scoped_thread_state_change.h" @@ -59,12 +57,12 @@ static struct AllMutexData gAllMutexData[kAllMutexDataSize]; class ScopedAllMutexesLock { public: explicit ScopedAllMutexesLock(const BaseMutex* mutex) : mutex_(mutex) { - while (!gAllMutexData->all_mutexes_guard.compare_and_swap(0, reinterpret_cast<int32_t>(mutex))) { + while (!gAllMutexData->all_mutexes_guard.CompareAndSwap(0, reinterpret_cast<int32_t>(mutex))) { NanoSleep(100); } } ~ScopedAllMutexesLock() { - while (!gAllMutexData->all_mutexes_guard.compare_and_swap(reinterpret_cast<int32_t>(mutex_), 0)) { + while (!gAllMutexData->all_mutexes_guard.CompareAndSwap(reinterpret_cast<int32_t>(mutex_), 0)) { NanoSleep(100); } } @@ -176,7 +174,7 @@ void BaseMutex::RecordContention(uint64_t blocked_tid, do { slot = data->cur_content_log_entry; new_slot = (slot + 1) % kContentionLogSize; - } while (!data->cur_content_log_entry.compare_and_swap(slot, new_slot)); + } while (!data->cur_content_log_entry.CompareAndSwap(slot, new_slot)); log[new_slot].blocked_tid = blocked_tid; log[new_slot].owner_tid = owner_tid; log[new_slot].count = 1; @@ -300,11 +298,11 @@ void Mutex::ExclusiveLock(Thread* self) { int32_t cur_state = state_; if (LIKELY(cur_state == 0)) { // Change state from 0 to 1. - done = android_atomic_acquire_cas(0, 1, &state_) == 0; + done = __sync_bool_compare_and_swap(&state_, 0 /* cur_state */, 1 /* new state */); } else { // Failed to acquire, hang up. ScopedContentionRecorder scr(this, SafeGetTid(self), GetExclusiveOwnerTid()); - android_atomic_inc(&num_contenders_); + num_contenders_++; if (futex(&state_, FUTEX_WAIT, 1, NULL, NULL, 0) != 0) { // EAGAIN and EINTR both indicate a spurious failure, try again from the beginning. // We don't use TEMP_FAILURE_RETRY so we can intentionally retry to acquire the lock. @@ -312,9 +310,10 @@ void Mutex::ExclusiveLock(Thread* self) { PLOG(FATAL) << "futex wait failed for " << name_; } } - android_atomic_dec(&num_contenders_); + num_contenders_--; } } while (!done); + QuasiAtomic::MembarStoreLoad(); DCHECK_EQ(state_, 1); exclusive_owner_ = SafeGetTid(self); #else @@ -342,11 +341,12 @@ bool Mutex::ExclusiveTryLock(Thread* self) { int32_t cur_state = state_; if (cur_state == 0) { // Change state from 0 to 1. - done = android_atomic_acquire_cas(0, 1, &state_) == 0; + done = __sync_bool_compare_and_swap(&state_, 0 /* cur_state */, 1 /* new state */); } else { return false; } } while (!done); + QuasiAtomic::MembarStoreLoad(); DCHECK_EQ(state_, 1); exclusive_owner_ = SafeGetTid(self); #else @@ -385,10 +385,11 @@ void Mutex::ExclusiveUnlock(Thread* self) { do { int32_t cur_state = state_; if (LIKELY(cur_state == 1)) { + QuasiAtomic::MembarStoreStore(); // We're no longer the owner. exclusive_owner_ = 0; // Change state to 0. - done = android_atomic_release_cas(cur_state, 0, &state_) == 0; + done = __sync_bool_compare_and_swap(&state_, cur_state, 0 /* new state */); if (LIKELY(done)) { // Spurious fail? // Wake a contender if (UNLIKELY(num_contenders_ > 0)) { @@ -407,6 +408,7 @@ void Mutex::ExclusiveUnlock(Thread* self) { } } } while (!done); + QuasiAtomic::MembarStoreLoad(); #else CHECK_MUTEX_CALL(pthread_mutex_unlock, (&mutex_)); #endif @@ -468,11 +470,11 @@ void ReaderWriterMutex::ExclusiveLock(Thread* self) { int32_t cur_state = state_; if (LIKELY(cur_state == 0)) { // Change state from 0 to -1. - done = android_atomic_acquire_cas(0, -1, &state_) == 0; + done = __sync_bool_compare_and_swap(&state_, 0 /* cur_state*/, -1 /* new state */); } else { // Failed to acquire, hang up. ScopedContentionRecorder scr(this, SafeGetTid(self), GetExclusiveOwnerTid()); - android_atomic_inc(&num_pending_writers_); + num_pending_writers_++; if (futex(&state_, FUTEX_WAIT, cur_state, NULL, NULL, 0) != 0) { // EAGAIN and EINTR both indicate a spurious failure, try again from the beginning. // We don't use TEMP_FAILURE_RETRY so we can intentionally retry to acquire the lock. @@ -480,7 +482,7 @@ void ReaderWriterMutex::ExclusiveLock(Thread* self) { PLOG(FATAL) << "futex wait failed for " << name_; } } - android_atomic_dec(&num_pending_writers_); + num_pending_writers_--; } } while (!done); DCHECK_EQ(state_, -1); @@ -504,7 +506,7 @@ void ReaderWriterMutex::ExclusiveUnlock(Thread* self) { // We're no longer the owner. exclusive_owner_ = 0; // Change state from -1 to 0. - done = android_atomic_release_cas(-1, 0, &state_) == 0; + done = __sync_bool_compare_and_swap(&state_, -1 /* cur_state*/, 0 /* new state */); if (LIKELY(done)) { // cmpxchg may fail due to noise? // Wake any waiters. if (UNLIKELY(num_pending_readers_ > 0 || num_pending_writers_ > 0)) { @@ -531,7 +533,7 @@ bool ReaderWriterMutex::ExclusiveLockWithTimeout(Thread* self, int64_t ms, int32 int32_t cur_state = state_; if (cur_state == 0) { // Change state from 0 to -1. - done = android_atomic_acquire_cas(0, -1, &state_) == 0; + done = __sync_bool_compare_and_swap(&state_, 0 /* cur_state */, -1 /* new state */); } else { // Failed to acquire, hang up. timespec now_abs_ts; @@ -541,10 +543,10 @@ bool ReaderWriterMutex::ExclusiveLockWithTimeout(Thread* self, int64_t ms, int32 return false; // Timed out. } ScopedContentionRecorder scr(this, SafeGetTid(self), GetExclusiveOwnerTid()); - android_atomic_inc(&num_pending_writers_); + num_pending_writers_++; if (futex(&state_, FUTEX_WAIT, cur_state, &rel_ts, NULL, 0) != 0) { if (errno == ETIMEDOUT) { - android_atomic_dec(&num_pending_writers_); + num_pending_writers_--; return false; // Timed out. } else if ((errno != EAGAIN) && (errno != EINTR)) { // EAGAIN and EINTR both indicate a spurious failure, @@ -553,7 +555,7 @@ bool ReaderWriterMutex::ExclusiveLockWithTimeout(Thread* self, int64_t ms, int32 PLOG(FATAL) << "timed futex wait failed for " << name_; } } - android_atomic_dec(&num_pending_writers_); + num_pending_writers_--; } } while (!done); exclusive_owner_ = SafeGetTid(self); @@ -583,7 +585,7 @@ bool ReaderWriterMutex::SharedTryLock(Thread* self) { int32_t cur_state = state_; if (cur_state >= 0) { // Add as an extra reader. - done = android_atomic_acquire_cas(cur_state, cur_state + 1, &state_) == 0; + done = __sync_bool_compare_and_swap(&state_, cur_state, cur_state + 1); } else { // Owner holds it exclusively. return false; @@ -666,13 +668,13 @@ void ConditionVariable::Broadcast(Thread* self) { DCHECK_EQ(guard_.GetExclusiveOwnerTid(), SafeGetTid(self)); #if ART_USE_FUTEXES if (num_waiters_ > 0) { - android_atomic_inc(&sequence_); // Indicate the broadcast occurred. + sequence_++; // Indicate the broadcast occurred. bool done = false; do { int32_t cur_sequence = sequence_; // Requeue waiters onto mutex. The waiter holds the contender count on the mutex high ensuring // mutex unlocks will awaken the requeued waiter thread. - done = futex(&sequence_, FUTEX_CMP_REQUEUE, 0, + done = futex(sequence_.Address(), FUTEX_CMP_REQUEUE, 0, reinterpret_cast<const timespec*>(std::numeric_limits<int32_t>::max()), &guard_.state_, cur_sequence) != -1; if (!done) { @@ -692,10 +694,10 @@ void ConditionVariable::Signal(Thread* self) { guard_.AssertExclusiveHeld(self); #if ART_USE_FUTEXES if (num_waiters_ > 0) { - android_atomic_inc(&sequence_); // Indicate a signal occurred. + sequence_++; // Indicate a signal occurred. // Futex wake 1 waiter who will then come and in contend on mutex. It'd be nice to requeue them // to avoid this, however, requeueing can only move all waiters. - int num_woken = futex(&sequence_, FUTEX_WAKE, 1, NULL, NULL, 0); + int num_woken = futex(sequence_.Address(), FUTEX_WAKE, 1, NULL, NULL, 0); // Check something was woken or else we changed sequence_ before they had chance to wait. CHECK((num_woken == 0) || (num_woken == 1)); } @@ -716,11 +718,11 @@ void ConditionVariable::WaitHoldingLocks(Thread* self) { #if ART_USE_FUTEXES num_waiters_++; // Ensure the Mutex is contended so that requeued threads are awoken. - android_atomic_inc(&guard_.num_contenders_); + guard_.num_contenders_++; guard_.recursion_count_ = 1; int32_t cur_sequence = sequence_; guard_.ExclusiveUnlock(self); - if (futex(&sequence_, FUTEX_WAIT, cur_sequence, NULL, NULL, 0) != 0) { + if (futex(sequence_.Address(), FUTEX_WAIT, cur_sequence, NULL, NULL, 0) != 0) { // Futex failed, check it is an expected error. // EAGAIN == EWOULDBLK, so we let the caller try again. // EINTR implies a signal was sent to this thread. @@ -733,7 +735,7 @@ void ConditionVariable::WaitHoldingLocks(Thread* self) { num_waiters_--; // We awoke and so no longer require awakes from the guard_'s unlock. CHECK_GE(guard_.num_contenders_, 0); - android_atomic_dec(&guard_.num_contenders_); + guard_.num_contenders_--; #else guard_.recursion_count_ = 0; CHECK_MUTEX_CALL(pthread_cond_wait, (&cond_, &guard_.mutex_)); @@ -751,11 +753,11 @@ void ConditionVariable::TimedWait(Thread* self, int64_t ms, int32_t ns) { InitTimeSpec(false, CLOCK_REALTIME, ms, ns, &rel_ts); num_waiters_++; // Ensure the Mutex is contended so that requeued threads are awoken. - android_atomic_inc(&guard_.num_contenders_); + guard_.num_contenders_++; guard_.recursion_count_ = 1; int32_t cur_sequence = sequence_; guard_.ExclusiveUnlock(self); - if (futex(&sequence_, FUTEX_WAIT, cur_sequence, &rel_ts, NULL, 0) != 0) { + if (futex(sequence_.Address(), FUTEX_WAIT, cur_sequence, &rel_ts, NULL, 0) != 0) { if (errno == ETIMEDOUT) { // Timed out we're done. } else if ((errno == EAGAIN) || (errno == EINTR)) { @@ -769,7 +771,7 @@ void ConditionVariable::TimedWait(Thread* self, int64_t ms, int32_t ns) { num_waiters_--; // We awoke and so no longer require awakes from the guard_'s unlock. CHECK_GE(guard_.num_contenders_, 0); - android_atomic_dec(&guard_.num_contenders_); + guard_.num_contenders_--; #else #ifdef HAVE_TIMEDWAIT_MONOTONIC #define TIMEDWAIT pthread_cond_timedwait_monotonic diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h index b894c0a..1c1dcaf 100644 --- a/runtime/base/mutex.h +++ b/runtime/base/mutex.h @@ -191,7 +191,7 @@ class LOCKABLE Mutex : public BaseMutex { // Exclusive owner. volatile uint64_t exclusive_owner_; // Number of waiting contenders. - volatile int32_t num_contenders_; + AtomicInteger num_contenders_; #else pthread_mutex_t mutex_; #endif @@ -304,7 +304,7 @@ class LOCKABLE ReaderWriterMutex : public BaseMutex { // Pending readers. volatile int32_t num_pending_readers_; // Pending writers. - volatile int32_t num_pending_writers_; + AtomicInteger num_pending_writers_; #else pthread_rwlock_t rwlock_; #endif @@ -339,7 +339,7 @@ class ConditionVariable { // their Mutex and another thread takes it and signals, the waiting thread observes that sequence_ // changed and doesn't enter the wait. Modified while holding guard_, but is read by futex wait // without guard_ held. - volatile int32_t sequence_; + AtomicInteger sequence_; // Number of threads that have come into to wait, not the length of the waiters on the futex as // waiters may have been requeued onto guard_. Guarded by guard_. volatile int32_t num_waiters_; diff --git a/runtime/gc/accounting/atomic_stack.h b/runtime/gc/accounting/atomic_stack.h index 8fa5b86..02e01b8 100644 --- a/runtime/gc/accounting/atomic_stack.h +++ b/runtime/gc/accounting/atomic_stack.h @@ -68,7 +68,7 @@ class AtomicStack { // Stack overflow. return false; } - } while (!back_index_.compare_and_swap(index, index + 1)); + } while (!back_index_.CompareAndSwap(index, index + 1)); begin_[index] = value; return true; } @@ -93,7 +93,7 @@ class AtomicStack { // Take an item from the front of the stack. T PopFront() { int32_t index = front_index_; - DCHECK_LT(index, back_index_.load()); + DCHECK_LT(index, back_index_.Load()); front_index_ = front_index_ + 1; return begin_[index]; } @@ -101,7 +101,7 @@ class AtomicStack { // Pop a number of elements. void PopBackCount(int32_t n) { DCHECK_GE(Size(), static_cast<size_t>(n)); - back_index_.fetch_sub(n); + back_index_.FetchAndSub(n); } bool IsEmpty() const { @@ -132,11 +132,11 @@ class AtomicStack { } void Sort() { - int32_t start_back_index = back_index_.load(); - int32_t start_front_index = front_index_.load(); + int32_t start_back_index = back_index_.Load(); + int32_t start_front_index = front_index_.Load(); std::sort(Begin(), End()); - CHECK_EQ(start_back_index, back_index_.load()); - CHECK_EQ(start_front_index, front_index_.load()); + CHECK_EQ(start_back_index, back_index_.Load()); + CHECK_EQ(start_front_index, front_index_.Load()); if (kIsDebugBuild) { debug_is_sorted_ = true; } diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc index 28cc510..cae2a54 100644 --- a/runtime/gc/collector/mark_sweep.cc +++ b/runtime/gc/collector/mark_sweep.cc @@ -1109,8 +1109,8 @@ void MarkSweep::SweepCallback(size_t num_ptrs, Object** ptrs, void* arg) { // AllocSpace::FreeList clears the value in ptrs, so perform after clearing the live bit size_t freed_bytes = space->FreeList(self, num_ptrs, ptrs); heap->RecordFree(freed_objects, freed_bytes); - mark_sweep->freed_objects_.fetch_add(freed_objects); - mark_sweep->freed_bytes_.fetch_add(freed_bytes); + mark_sweep->freed_objects_.FetchAndAdd(freed_objects); + mark_sweep->freed_bytes_.FetchAndAdd(freed_bytes); } void MarkSweep::ZygoteSweepCallback(size_t num_ptrs, Object** ptrs, void* arg) { @@ -1192,10 +1192,10 @@ void MarkSweep::SweepArray(accounting::ObjectStack* allocations, bool swap_bitma VLOG(heap) << "Freed " << freed_objects << "/" << count << " objects with size " << PrettySize(freed_bytes); heap_->RecordFree(freed_objects + freed_large_objects, freed_bytes + freed_large_object_bytes); - freed_objects_.fetch_add(freed_objects); - freed_large_objects_.fetch_add(freed_large_objects); - freed_bytes_.fetch_add(freed_bytes); - freed_large_object_bytes_.fetch_add(freed_large_object_bytes); + freed_objects_.FetchAndAdd(freed_objects); + freed_large_objects_.FetchAndAdd(freed_large_objects); + freed_bytes_.FetchAndAdd(freed_bytes); + freed_large_object_bytes_.FetchAndAdd(freed_large_object_bytes); timings_.EndSplit(); timings_.StartSplit("ResetStack"); @@ -1267,8 +1267,8 @@ void MarkSweep::SweepLargeObjects(bool swap_bitmaps) { ++freed_objects; } } - freed_large_objects_.fetch_add(freed_objects); - freed_large_object_bytes_.fetch_add(freed_bytes); + freed_large_objects_.FetchAndAdd(freed_objects); + freed_large_object_bytes_.FetchAndAdd(freed_bytes); GetHeap()->RecordFree(freed_objects, freed_bytes); } diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc index f29eadb..a4f7121 100644 --- a/runtime/gc/collector/semi_space.cc +++ b/runtime/gc/collector/semi_space.cc @@ -236,8 +236,8 @@ void SemiSpace::ReclaimPhase() { int freed_bytes = from_bytes - to_bytes; int freed_objects = from_objects - to_objects; CHECK_GE(freed_bytes, 0); - freed_bytes_.fetch_add(freed_bytes); - freed_objects_.fetch_add(freed_objects); + freed_bytes_.FetchAndAdd(freed_bytes); + freed_objects_.FetchAndAdd(freed_objects); heap_->RecordFree(static_cast<size_t>(freed_objects), static_cast<size_t>(freed_bytes)); timings_.StartSplit("PreSweepingGcVerification"); @@ -332,7 +332,7 @@ Object* SemiSpace::MarkObject(Object* obj) { // If out of space, fall back to the to-space. forward_address = to_space_->Alloc(self_, object_size, &bytes_allocated); } else { - GetHeap()->num_bytes_allocated_.fetch_add(bytes_promoted); + GetHeap()->num_bytes_allocated_.FetchAndAdd(bytes_promoted); bytes_promoted_ += bytes_promoted; // Mark forward_address on the live bit map. accounting::SpaceBitmap* live_bitmap = non_moving_space->GetLiveBitmap(); @@ -446,8 +446,8 @@ void SemiSpace::SweepCallback(size_t num_ptrs, Object** ptrs, void* arg) { Locks::heap_bitmap_lock_->AssertExclusiveHeld(self); size_t freed_bytes = space->FreeList(self, num_ptrs, ptrs); heap->RecordFree(num_ptrs, freed_bytes); - gc->freed_objects_.fetch_add(num_ptrs); - gc->freed_bytes_.fetch_add(freed_bytes); + gc->freed_objects_.FetchAndAdd(num_ptrs); + gc->freed_bytes_.FetchAndAdd(freed_bytes); } void SemiSpace::ZygoteSweepCallback(size_t num_ptrs, Object** ptrs, void* arg) { @@ -526,8 +526,8 @@ void SemiSpace::SweepLargeObjects(bool swap_bitmaps) { ++freed_objects; } } - freed_large_objects_.fetch_add(freed_objects); - freed_large_object_bytes_.fetch_add(freed_bytes); + freed_large_objects_.FetchAndAdd(freed_objects); + freed_large_object_bytes_.FetchAndAdd(freed_bytes); GetHeap()->RecordFree(freed_objects, freed_bytes); } diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h index 9fb5760..af1b26b 100644 --- a/runtime/gc/heap-inl.h +++ b/runtime/gc/heap-inl.h @@ -61,7 +61,7 @@ inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Clas pre_fence_visitor(obj); DCHECK_GT(bytes_allocated, 0u); const size_t new_num_bytes_allocated = - static_cast<size_t>(num_bytes_allocated_.fetch_add(bytes_allocated)) + bytes_allocated; + static_cast<size_t>(num_bytes_allocated_.FetchAndAdd(bytes_allocated)) + bytes_allocated; // TODO: Deprecate. if (kInstrumented) { if (Runtime::Current()->HasStatsEnabled()) { @@ -200,7 +200,7 @@ inline Heap::AllocationTimer::~AllocationTimer() { // Only if the allocation succeeded, record the time. if (allocated_obj != nullptr) { uint64_t allocation_end_time = NanoTime() / kTimeAdjust; - heap_->total_allocation_time_.fetch_add(allocation_end_time - allocation_start_time_); + heap_->total_allocation_time_.FetchAndAdd(allocation_end_time - allocation_start_time_); } } }; diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index 61c66e7..e08106b 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -413,13 +413,13 @@ void Heap::AddSpace(space::Space* space) { void Heap::RegisterGCAllocation(size_t bytes) { if (this != nullptr) { - gc_memory_overhead_.fetch_add(bytes); + gc_memory_overhead_.FetchAndAdd(bytes); } } void Heap::RegisterGCDeAllocation(size_t bytes) { if (this != nullptr) { - gc_memory_overhead_.fetch_sub(bytes); + gc_memory_overhead_.FetchAndSub(bytes); } } @@ -802,7 +802,7 @@ void Heap::DumpSpaces(std::ostream& stream) { void Heap::VerifyObjectBody(const mirror::Object* obj) { CHECK(IsAligned<kObjectAlignment>(obj)) << "Object isn't aligned: " << obj; // Ignore early dawn of the universe verifications. - if (UNLIKELY(static_cast<size_t>(num_bytes_allocated_.load()) < 10 * KB)) { + if (UNLIKELY(static_cast<size_t>(num_bytes_allocated_.Load()) < 10 * KB)) { return; } const byte* raw_addr = reinterpret_cast<const byte*>(obj) + @@ -847,7 +847,8 @@ void Heap::VerifyHeap() { void Heap::RecordFree(size_t freed_objects, size_t freed_bytes) { DCHECK_LE(freed_bytes, static_cast<size_t>(num_bytes_allocated_)); - num_bytes_allocated_.fetch_sub(freed_bytes); + num_bytes_allocated_.FetchAndSub(freed_bytes); + if (Runtime::Current()->HasStatsEnabled()) { RuntimeStats* thread_stats = Thread::Current()->GetStats(); thread_stats->freed_objects += freed_objects; @@ -2082,7 +2083,7 @@ void Heap::RegisterNativeAllocation(JNIEnv* env, int bytes) { native_need_to_run_finalization_ = false; } // Total number of native bytes allocated. - native_bytes_allocated_.fetch_add(bytes); + native_bytes_allocated_.FetchAndAdd(bytes); if (static_cast<size_t>(native_bytes_allocated_) > native_footprint_gc_watermark_) { collector::GcType gc_type = have_zygote_space_ ? collector::kGcTypePartial : collector::kGcTypeFull; @@ -2118,7 +2119,7 @@ void Heap::RegisterNativeAllocation(JNIEnv* env, int bytes) { void Heap::RegisterNativeFree(JNIEnv* env, int bytes) { int expected_size, new_size; do { - expected_size = native_bytes_allocated_.load(); + expected_size = native_bytes_allocated_.Load(); new_size = expected_size - bytes; if (UNLIKELY(new_size < 0)) { ScopedObjectAccess soa(env); @@ -2127,7 +2128,7 @@ void Heap::RegisterNativeFree(JNIEnv* env, int bytes) { "registered as allocated", bytes, expected_size).c_str()); break; } - } while (!native_bytes_allocated_.compare_and_swap(expected_size, new_size)); + } while (!native_bytes_allocated_.CompareAndSwap(expected_size, new_size)); } int64_t Heap::GetTotalMemory() const { diff --git a/runtime/gc/space/bump_pointer_space-inl.h b/runtime/gc/space/bump_pointer_space-inl.h index 82e96a4..ac20972 100644 --- a/runtime/gc/space/bump_pointer_space-inl.h +++ b/runtime/gc/space/bump_pointer_space-inl.h @@ -44,8 +44,8 @@ inline mirror::Object* BumpPointerSpace::AllocNonvirtualWithoutAccounting(size_t inline mirror::Object* BumpPointerSpace::AllocNonvirtual(size_t num_bytes) { mirror::Object* ret = AllocNonvirtualWithoutAccounting(num_bytes); if (ret != nullptr) { - objects_allocated_.fetch_add(1); - bytes_allocated_.fetch_add(num_bytes); + objects_allocated_.FetchAndAdd(1); + bytes_allocated_.FetchAndAdd(num_bytes); } return ret; } diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc index 7ea202c..d5bc667 100644 --- a/runtime/gc/space/bump_pointer_space.cc +++ b/runtime/gc/space/bump_pointer_space.cc @@ -172,7 +172,7 @@ bool BumpPointerSpace::IsEmpty() const { uint64_t BumpPointerSpace::GetBytesAllocated() { // Start out pre-determined amount (blocks which are not being allocated into). - uint64_t total = static_cast<uint64_t>(bytes_allocated_.load()); + uint64_t total = static_cast<uint64_t>(bytes_allocated_.Load()); Thread* self = Thread::Current(); MutexLock mu(self, *Locks::runtime_shutdown_lock_); MutexLock mu2(self, *Locks::thread_list_lock_); @@ -190,7 +190,7 @@ uint64_t BumpPointerSpace::GetBytesAllocated() { uint64_t BumpPointerSpace::GetObjectsAllocated() { // Start out pre-determined amount (blocks which are not being allocated into). - uint64_t total = static_cast<uint64_t>(objects_allocated_.load()); + uint64_t total = static_cast<uint64_t>(objects_allocated_.Load()); Thread* self = Thread::Current(); MutexLock mu(self, *Locks::runtime_shutdown_lock_); MutexLock mu2(self, *Locks::thread_list_lock_); @@ -207,8 +207,8 @@ uint64_t BumpPointerSpace::GetObjectsAllocated() { } void BumpPointerSpace::RevokeThreadLocalBuffersLocked(Thread* thread) { - objects_allocated_.fetch_add(thread->thread_local_objects_); - bytes_allocated_.fetch_add(thread->thread_local_pos_ - thread->thread_local_start_); + objects_allocated_.FetchAndAdd(thread->thread_local_objects_); + bytes_allocated_.FetchAndAdd(thread->thread_local_pos_ - thread->thread_local_start_); thread->SetTLAB(nullptr, nullptr); } diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc index c6177bd..4777cc6 100644 --- a/runtime/gc/space/image_space.cc +++ b/runtime/gc/space/image_space.cc @@ -227,7 +227,7 @@ ImageSpace* ImageSpace::Init(const char* image_file_name, bool validate_oat_file *error_msg = StringPrintf("Failed to map image bitmap: %s", error_msg->c_str()); return nullptr; } - size_t bitmap_index = bitmap_index_.fetch_add(1); + size_t bitmap_index = bitmap_index_.FetchAndAdd(1); std::string bitmap_name(StringPrintf("imagespace %s live-bitmap %u", image_file_name, bitmap_index)); UniquePtr<accounting::SpaceBitmap> bitmap( diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc index 4ad9c63..47c1899 100644 --- a/runtime/instrumentation.cc +++ b/runtime/instrumentation.cc @@ -407,9 +407,9 @@ static void ResetQuickAllocEntryPointsForThread(Thread* thread, void* arg) { void Instrumentation::InstrumentQuickAllocEntryPoints() { // TODO: the read of quick_alloc_entry_points_instrumentation_counter_ is racey and this code // should be guarded by a lock. - DCHECK_GE(quick_alloc_entry_points_instrumentation_counter_.load(), 0); + DCHECK_GE(quick_alloc_entry_points_instrumentation_counter_.Load(), 0); const bool enable_instrumentation = - quick_alloc_entry_points_instrumentation_counter_.fetch_add(1) == 0; + quick_alloc_entry_points_instrumentation_counter_.FetchAndAdd(1) == 0; if (enable_instrumentation) { // Instrumentation wasn't enabled so enable it. SetQuickAllocEntryPointsInstrumented(true); @@ -420,9 +420,9 @@ void Instrumentation::InstrumentQuickAllocEntryPoints() { void Instrumentation::UninstrumentQuickAllocEntryPoints() { // TODO: the read of quick_alloc_entry_points_instrumentation_counter_ is racey and this code // should be guarded by a lock. - DCHECK_GT(quick_alloc_entry_points_instrumentation_counter_.load(), 0); + DCHECK_GT(quick_alloc_entry_points_instrumentation_counter_.Load(), 0); const bool disable_instrumentation = - quick_alloc_entry_points_instrumentation_counter_.fetch_sub(1) == 1; + quick_alloc_entry_points_instrumentation_counter_.FetchAndSub(1) == 1; if (disable_instrumentation) { SetQuickAllocEntryPointsInstrumented(false); ResetQuickAllocEntryPoints(); diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc index 99c85bd..942c275 100644 --- a/runtime/interpreter/interpreter_goto_table_impl.cc +++ b/runtime/interpreter/interpreter_goto_table_impl.cc @@ -245,7 +245,7 @@ JValue ExecuteGotoImpl(Thread* self, MethodHelper& mh, const DexFile::CodeItem* // If access checks are required then the dex-to-dex compiler and analysis of // whether the class has final fields hasn't been performed. Conservatively // perform the memory barrier now. - ANDROID_MEMBAR_STORE(); + QuasiAtomic::MembarStoreLoad(); } if (UNLIKELY(self->TestAllFlags())) { CheckSuspend(self); @@ -261,7 +261,7 @@ JValue ExecuteGotoImpl(Thread* self, MethodHelper& mh, const DexFile::CodeItem* HANDLE_INSTRUCTION_END(); HANDLE_INSTRUCTION_START(RETURN_VOID_BARRIER) { - ANDROID_MEMBAR_STORE(); + QuasiAtomic::MembarStoreLoad(); JValue result; if (UNLIKELY(self->TestAllFlags())) { CheckSuspend(self); diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc index 675095f..75041ea 100644 --- a/runtime/interpreter/interpreter_switch_impl.cc +++ b/runtime/interpreter/interpreter_switch_impl.cc @@ -169,7 +169,7 @@ JValue ExecuteSwitchImpl(Thread* self, MethodHelper& mh, const DexFile::CodeItem // If access checks are required then the dex-to-dex compiler and analysis of // whether the class has final fields hasn't been performed. Conservatively // perform the memory barrier now. - ANDROID_MEMBAR_STORE(); + QuasiAtomic::MembarStoreLoad(); } if (UNLIKELY(self->TestAllFlags())) { CheckSuspend(self); @@ -183,7 +183,7 @@ JValue ExecuteSwitchImpl(Thread* self, MethodHelper& mh, const DexFile::CodeItem } case Instruction::RETURN_VOID_BARRIER: { PREAMBLE(); - ANDROID_MEMBAR_STORE(); + QuasiAtomic::MembarStoreLoad(); JValue result; if (UNLIKELY(self->TestAllFlags())) { CheckSuspend(self); diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h index 7ac2c8c..9161bc5 100644 --- a/runtime/mirror/object-inl.h +++ b/runtime/mirror/object-inl.h @@ -253,11 +253,40 @@ inline size_t Object::SizeOf() const { return result; } +inline uint32_t Object::GetField32(MemberOffset field_offset, bool is_volatile) const { + VerifyObject(this); + const byte* raw_addr = reinterpret_cast<const byte*>(this) + field_offset.Int32Value(); + const int32_t* word_addr = reinterpret_cast<const int32_t*>(raw_addr); + if (UNLIKELY(is_volatile)) { + int32_t result = *(reinterpret_cast<volatile int32_t*>(const_cast<int32_t*>(word_addr))); + QuasiAtomic::MembarLoadLoad(); + return result; + } else { + return *word_addr; + } +} + +inline void Object::SetField32(MemberOffset field_offset, uint32_t new_value, bool is_volatile, + bool this_is_valid) { + if (this_is_valid) { + VerifyObject(this); + } + byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value(); + uint32_t* word_addr = reinterpret_cast<uint32_t*>(raw_addr); + if (UNLIKELY(is_volatile)) { + QuasiAtomic::MembarStoreStore(); // Ensure this store occurs after others in the queue. + *word_addr = new_value; + QuasiAtomic::MembarStoreLoad(); // Ensure this store occurs before any loads. + } else { + *word_addr = new_value; + } +} + inline bool Object::CasField32(MemberOffset field_offset, uint32_t old_value, uint32_t new_value) { VerifyObject(this); byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value(); - int32_t* addr = reinterpret_cast<int32_t*>(raw_addr); - return android_atomic_release_cas(old_value, new_value, addr) == 0; + volatile uint32_t* addr = reinterpret_cast<volatile uint32_t*>(raw_addr); + return __sync_bool_compare_and_swap(addr, old_value, new_value); } inline uint64_t Object::GetField64(MemberOffset field_offset, bool is_volatile) const { @@ -266,7 +295,7 @@ inline uint64_t Object::GetField64(MemberOffset field_offset, bool is_volatile) const int64_t* addr = reinterpret_cast<const int64_t*>(raw_addr); if (UNLIKELY(is_volatile)) { uint64_t result = QuasiAtomic::Read64(addr); - ANDROID_MEMBAR_FULL(); + QuasiAtomic::MembarLoadLoad(); return result; } else { return *addr; @@ -278,9 +307,13 @@ inline void Object::SetField64(MemberOffset field_offset, uint64_t new_value, bo byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value(); int64_t* addr = reinterpret_cast<int64_t*>(raw_addr); if (UNLIKELY(is_volatile)) { - ANDROID_MEMBAR_STORE(); + QuasiAtomic::MembarStoreStore(); // Ensure this store occurs after others in the queue. QuasiAtomic::Write64(addr, new_value); - // Post-store barrier not required due to use of atomic op or mutex. + if (!QuasiAtomic::LongAtomicsUseMutexes()) { + QuasiAtomic::MembarStoreLoad(); // Ensure this store occurs before any loads. + } else { + // Fence from from mutex is enough. + } } else { *addr = new_value; } diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc index 008a173..bdb3250 100644 --- a/runtime/mirror/object.cc +++ b/runtime/mirror/object.cc @@ -89,10 +89,10 @@ int32_t Object::GenerateIdentityHashCode() { static AtomicInteger seed(987654321 + std::time(nullptr)); int32_t expected_value, new_value; do { - expected_value = static_cast<uint32_t>(seed.load()); + expected_value = static_cast<uint32_t>(seed.Load()); new_value = expected_value * 1103515245 + 12345; } while ((expected_value & LockWord::kHashMask) == 0 || - !seed.compare_and_swap(expected_value, new_value)); + !seed.CompareAndSwap(expected_value, new_value)); return expected_value & LockWord::kHashMask; } diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h index fe89b7e..058aee7 100644 --- a/runtime/mirror/object.h +++ b/runtime/mirror/object.h @@ -187,37 +187,10 @@ class MANAGED Object { return reinterpret_cast<Object**>(reinterpret_cast<byte*>(this) + field_offset.Int32Value()); } - uint32_t GetField32(MemberOffset field_offset, bool is_volatile) const { - VerifyObject(this); - const byte* raw_addr = reinterpret_cast<const byte*>(this) + field_offset.Int32Value(); - const int32_t* word_addr = reinterpret_cast<const int32_t*>(raw_addr); - if (UNLIKELY(is_volatile)) { - return android_atomic_acquire_load(word_addr); - } else { - return *word_addr; - } - } + uint32_t GetField32(MemberOffset field_offset, bool is_volatile) const; void SetField32(MemberOffset field_offset, uint32_t new_value, bool is_volatile, - bool this_is_valid = true) { - if (this_is_valid) { - VerifyObject(this); - } - byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value(); - uint32_t* word_addr = reinterpret_cast<uint32_t*>(raw_addr); - if (UNLIKELY(is_volatile)) { - /* - * TODO: add an android_atomic_synchronization_store() function and - * use it in the 32-bit volatile set handlers. On some platforms we - * can use a fast atomic instruction and avoid the barriers. - */ - ANDROID_MEMBAR_STORE(); - *word_addr = new_value; - ANDROID_MEMBAR_FULL(); - } else { - *word_addr = new_value; - } - } + bool this_is_valid = true); bool CasField32(MemberOffset field_offset, uint32_t old_value, uint32_t new_value); diff --git a/runtime/monitor.cc b/runtime/monitor.cc index ef9a9ce..4186693 100644 --- a/runtime/monitor.cc +++ b/runtime/monitor.cc @@ -98,12 +98,12 @@ Monitor::Monitor(Thread* owner, mirror::Object* obj, int32_t hash_code) int32_t Monitor::GetHashCode() { while (!HasHashCode()) { - if (hash_code_.compare_and_swap(0, mirror::Object::GenerateIdentityHashCode())) { + if (hash_code_.CompareAndSwap(0, mirror::Object::GenerateIdentityHashCode())) { break; } } DCHECK(HasHashCode()); - return hash_code_.load(); + return hash_code_.Load(); } bool Monitor::Install(Thread* self) { @@ -660,6 +660,7 @@ void Monitor::MonitorEnter(Thread* self, mirror::Object* obj) { case LockWord::kUnlocked: { LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0)); if (sirt_obj->CasLockWord(lock_word, thin_locked)) { + QuasiAtomic::MembarLoadLoad(); return; // Success! } continue; // Go again. diff --git a/runtime/monitor.h b/runtime/monitor.h index bfd8545..16e9410 100644 --- a/runtime/monitor.h +++ b/runtime/monitor.h @@ -105,7 +105,7 @@ class Monitor { bool IsLocked() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); bool HasHashCode() const { - return hash_code_.load() != 0; + return hash_code_.Load() != 0; } static void InflateThinLocked(Thread* self, SirtRef<mirror::Object>& obj, LockWord lock_word, diff --git a/runtime/native/java_lang_reflect_Field.cc b/runtime/native/java_lang_reflect_Field.cc index 553aeb8..269a4a3 100644 --- a/runtime/native/java_lang_reflect_Field.cc +++ b/runtime/native/java_lang_reflect_Field.cc @@ -222,7 +222,7 @@ static void SetFieldValue(ScopedFastNativeObjectAccess& soa, mirror::Object* o, // Special handling for final fields on SMP systems. // We need a store/store barrier here (JMM requirement). if (f->IsFinal()) { - ANDROID_MEMBAR_STORE(); + QuasiAtomic::MembarStoreLoad(); } } diff --git a/runtime/native/sun_misc_Unsafe.cc b/runtime/native/sun_misc_Unsafe.cc index 2c6d281..b5fc7e7 100644 --- a/runtime/native/sun_misc_Unsafe.cc +++ b/runtime/native/sun_misc_Unsafe.cc @@ -86,7 +86,7 @@ static void Unsafe_putIntVolatile(JNIEnv* env, jobject, jobject javaObj, jlong o static void Unsafe_putOrderedInt(JNIEnv* env, jobject, jobject javaObj, jlong offset, jint newValue) { ScopedFastNativeObjectAccess soa(env); mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj); - ANDROID_MEMBAR_STORE(); + QuasiAtomic::MembarStoreStore(); obj->SetField32(MemberOffset(offset), newValue, false); } @@ -117,7 +117,7 @@ static void Unsafe_putLongVolatile(JNIEnv* env, jobject, jobject javaObj, jlong static void Unsafe_putOrderedLong(JNIEnv* env, jobject, jobject javaObj, jlong offset, jlong newValue) { ScopedFastNativeObjectAccess soa(env); mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj); - ANDROID_MEMBAR_STORE(); + QuasiAtomic::MembarStoreStore(); obj->SetField64(MemberOffset(offset), newValue, false); } @@ -153,7 +153,7 @@ static void Unsafe_putOrderedObject(JNIEnv* env, jobject, jobject javaObj, jlong ScopedFastNativeObjectAccess soa(env); mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj); mirror::Object* newValue = soa.Decode<mirror::Object*>(javaNewValue); - ANDROID_MEMBAR_STORE(); + QuasiAtomic::MembarStoreStore(); obj->SetFieldObject(MemberOffset(offset), newValue, false); } diff --git a/runtime/thread_pool_test.cc b/runtime/thread_pool_test.cc index 1b22361..2029d4b 100644 --- a/runtime/thread_pool_test.cc +++ b/runtime/thread_pool_test.cc @@ -94,7 +94,7 @@ TEST_F(ThreadPoolTest, StopStart) { EXPECT_EQ(0, bad_count); // Allow tasks to finish up and delete themselves. thread_pool.StartWorkers(self); - while (count.load() != num_tasks && bad_count.load() != 1) { + while (count.Load() != num_tasks && bad_count.Load() != 1) { usleep(200); } thread_pool.StopWorkers(self); |