summaryrefslogtreecommitdiffstats
path: root/libc/private/bionic_atomic_arm.h
diff options
context:
space:
mode:
Diffstat (limited to 'libc/private/bionic_atomic_arm.h')
-rw-r--r--libc/private/bionic_atomic_arm.h284
1 files changed, 284 insertions, 0 deletions
diff --git a/libc/private/bionic_atomic_arm.h b/libc/private/bionic_atomic_arm.h
new file mode 100644
index 0000000..275c1c9
--- /dev/null
+++ b/libc/private/bionic_atomic_arm.h
@@ -0,0 +1,284 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef BIONIC_ATOMIC_ARM_H
+#define BIONIC_ATOMIC_ARM_H
+
+#include <machine/cpu-features.h>
+
+/* Some of the harware instructions used below are not available in Thumb-1
+ * mode (they are if you build in ARM or Thumb-2 mode though). To solve this
+ * problem, we're going to use the same technique than libatomics_ops,
+ * which is to temporarily switch to ARM, do the operation, then switch
+ * back to Thumb-1.
+ *
+ * This results in two 'bx' jumps, just like a normal function call, but
+ * everything is kept inlined, avoids loading or computing the function's
+ * address, and prevents a little I-cache trashing too.
+ *
+ * However, it is highly recommended to avoid compiling any C library source
+ * file that use these functions in Thumb-1 mode.
+ *
+ * Define three helper macros to implement this:
+ */
+#if defined(__thumb__) && !defined(__thumb2__)
+# define __ATOMIC_SWITCH_TO_ARM \
+ "adr r3, 5f\n" \
+ "bx r3\n" \
+ ".align\n" \
+ ".arm\n" \
+ "5:\n"
+/* note: the leading \n below is intentional */
+# define __ATOMIC_SWITCH_TO_THUMB \
+ "\n" \
+ "adr r3, 6f\n" \
+ "bx r3\n" \
+ ".thumb" \
+ "6:\n"
+
+# define __ATOMIC_CLOBBERS "r3" /* list of clobbered registers */
+
+/* Warn the user that ARM mode should really be preferred! */
+# warning Rebuilding this source file in ARM mode is highly recommended for performance!!
+
+#else
+# define __ATOMIC_SWITCH_TO_ARM /* nothing */
+# define __ATOMIC_SWITCH_TO_THUMB /* nothing */
+# define __ATOMIC_CLOBBERS /* nothing */
+#endif
+
+
+/* Define a full memory barrier, this is only needed if we build the
+ * platform for a multi-core device. For the record, using a 'dmb'
+ * instruction on a Nexus One device can take up to 180 ns even if
+ * it is completely un-necessary on this device.
+ *
+ * NOTE: This is where the platform and NDK headers atomic headers are
+ * going to diverge. With the NDK, we don't know if the generated
+ * code is going to run on a single or multi-core device, so we
+ * need to be cautious.
+ *
+ * Fortunately, we can use the kernel helper function that is
+ * mapped at address 0xffff0fa0 in all user process, and that
+ * provides a device-specific barrier operation.
+ *
+ * I.e. on single-core devices, the helper immediately returns,
+ * on multi-core devices, it uses "dmb" or any other means to
+ * perform a full-memory barrier.
+ *
+ * There are three cases to consider for the platform:
+ *
+ * - multi-core ARMv7-A => use the 'dmb' hardware instruction
+ * - multi-core ARMv6 => use the coprocessor
+ * - single core ARMv5TE/6/7 => do not use any hardware barrier
+ */
+#if defined(ANDROID_SMP) && ANDROID_SMP == 1
+
+/* Sanity check, multi-core is only supported starting from ARMv6 */
+# if __ARM_ARCH__ < 6
+# error ANDROID_SMP should not be set to 1 for an ARM architecture less than 6
+# endif
+
+# ifdef __ARM_HAVE_DMB
+/* For ARMv7-A, we can use the 'dmb' instruction directly */
+__ATOMIC_INLINE__ void
+__bionic_memory_barrier(void)
+{
+ /* Note: we always build in ARM or Thumb-2 on ARMv7-A, so don't
+ * bother with __ATOMIC_SWITCH_TO_ARM */
+ __asm__ __volatile__ ( "dmb" : : : "memory" );
+}
+# else /* !__ARM_HAVE_DMB */
+/* Otherwise, i.e. for multi-core ARMv6, we need to use the coprocessor,
+ * which requires the use of a general-purpose register, which is slightly
+ * less efficient.
+ */
+__ATOMIC_INLINE__ void
+__bionic_memory_barrier(void)
+{
+ __asm__ __volatile__ (
+ __SWITCH_TO_ARM
+ "mcr p15, 0, %0, c7, c10, 5"
+ __SWITCH_TO_THUMB
+ : : "r" (0) : __ATOMIC_CLOBBERS "memory");
+}
+# endif /* !__ARM_HAVE_DMB */
+#else /* !ANDROID_SMP */
+__ATOMIC_INLINE__ void
+__bionic_memory_barrier(void)
+{
+ /* A simple compiler barrier */
+ __asm__ __volatile__ ( "" : : : "memory" );
+}
+#endif /* !ANDROID_SMP */
+
+/* Compare-and-swap, without any explicit barriers. Note that this functions
+ * returns 0 on success, and 1 on failure. The opposite convention is typically
+ * used on other platforms.
+ *
+ * There are two cases to consider:
+ *
+ * - ARMv6+ => use LDREX/STREX instructions
+ * - < ARMv6 => use kernel helper function mapped at 0xffff0fc0
+ *
+ * LDREX/STREX are only available starting from ARMv6
+ */
+#ifdef __ARM_HAVE_LDREX_STREX
+__ATOMIC_INLINE__ int
+__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
+{
+ int32_t prev, status;
+ do {
+ __asm__ __volatile__ (
+ __ATOMIC_SWITCH_TO_ARM
+ "ldrex %0, [%3]\n"
+ "mov %1, #0\n"
+ "teq %0, %4\n"
+#ifdef __thumb2__
+ "it eq\n"
+#endif
+ "strexeq %1, %5, [%3]"
+ __ATOMIC_SWITCH_TO_THUMB
+ : "=&r" (prev), "=&r" (status), "+m"(*ptr)
+ : "r" (ptr), "Ir" (old_value), "r" (new_value)
+ : __ATOMIC_CLOBBERS "cc");
+ } while (__builtin_expect(status != 0, 0));
+ return prev != old_value;
+}
+# else /* !__ARM_HAVE_LDREX_STREX */
+
+/* Use the handy kernel helper function mapped at 0xffff0fc0 */
+typedef int (kernel_cmpxchg)(int32_t, int32_t, volatile int32_t *);
+
+__ATOMIC_INLINE__ int
+__kernel_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
+{
+ /* Note: the kernel function returns 0 on success too */
+ return (*(kernel_cmpxchg *)0xffff0fc0)(old_value, new_value, ptr);
+}
+
+__ATOMIC_INLINE__ int
+__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
+{
+ return __kernel_cmpxchg(old_value, new_value, ptr);
+}
+#endif /* !__ARM_HAVE_LDREX_STREX */
+
+/* Swap operation, without any explicit barriers.
+ * There are again two similar cases to consider:
+ *
+ * ARMv6+ => use LDREX/STREX
+ * < ARMv6 => use SWP instead.
+ */
+#ifdef __ARM_HAVE_LDREX_STREX
+__ATOMIC_INLINE__ int32_t
+__bionic_swap(int32_t new_value, volatile int32_t* ptr)
+{
+ int32_t prev, status;
+ do {
+ __asm__ __volatile__ (
+ __ATOMIC_SWITCH_TO_ARM
+ "ldrex %0, [%3]\n"
+ "strex %1, %4, [%3]"
+ __ATOMIC_SWITCH_TO_THUMB
+ : "=&r" (prev), "=&r" (status), "+m" (*ptr)
+ : "r" (ptr), "r" (new_value)
+ : __ATOMIC_CLOBBERS "cc");
+ } while (__builtin_expect(status != 0, 0));
+ return prev;
+}
+#else /* !__ARM_HAVE_LDREX_STREX */
+__ATOMIC_INLINE__ int32_t
+__bionic_swap(int32_t new_value, volatile int32_t* ptr)
+{
+ int32_t prev;
+ /* NOTE: SWP is available in Thumb-1 too */
+ __asm__ __volatile__ ("swp %0, %2, [%3]"
+ : "=&r" (prev), "+m" (*ptr)
+ : "r" (new_value), "r" (ptr)
+ : "cc");
+ return prev;
+}
+#endif /* !__ARM_HAVE_LDREX_STREX */
+
+/* Atomic increment - without any barriers
+ * This returns the old value
+ */
+#ifdef __ARM_HAVE_LDREX_STREX
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_inc(volatile int32_t* ptr)
+{
+ int32_t prev, tmp, status;
+ do {
+ __asm__ __volatile__ (
+ __ATOMIC_SWITCH_TO_ARM
+ "ldrex %0, [%4]\n"
+ "add %1, %0, #1\n"
+ "strex %2, %1, [%4]"
+ __ATOMIC_SWITCH_TO_THUMB
+ : "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
+ : "r" (ptr)
+ : __ATOMIC_CLOBBERS "cc");
+ } while (__builtin_expect(status != 0, 0));
+ return prev;
+}
+#else
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_inc(volatile int32_t* ptr)
+{
+ int32_t prev, status;
+ do {
+ prev = *ptr;
+ status = __kernel_cmpxchg(prev, prev+1, ptr);
+ } while (__builtin_expect(status != 0, 0));
+ return prev;
+}
+#endif
+
+/* Atomic decrement - without any barriers
+ * This returns the old value.
+ */
+#ifdef __ARM_HAVE_LDREX_STREX
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_dec(volatile int32_t* ptr)
+{
+ int32_t prev, tmp, status;
+ do {
+ __asm__ __volatile__ (
+ __ATOMIC_SWITCH_TO_ARM
+ "ldrex %0, [%4]\n"
+ "sub %1, %0, #1\n"
+ "strex %2, %1, [%4]"
+ __ATOMIC_SWITCH_TO_THUMB
+ : "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
+ : "r" (ptr)
+ : __ATOMIC_CLOBBERS "cc");
+ } while (__builtin_expect(status != 0, 0));
+ return prev;
+}
+#else
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_dec(volatile int32_t* ptr)
+{
+ int32_t prev, status;
+ do {
+ prev = *ptr;
+ status = __kernel_cmpxchg(prev, prev-1, ptr);
+ } while (__builtin_expect(status != 0, 0));
+ return prev;
+}
+#endif
+
+#endif /* SYS_ATOMICS_ARM_H */