1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
|
/*
* Copyright (C) 2011 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef BIONIC_ATOMIC_ARM_H
#define BIONIC_ATOMIC_ARM_H
#include <machine/cpu-features.h>
/* Some of the harware instructions used below are not available in Thumb-1
* mode (they are if you build in ARM or Thumb-2 mode though). To solve this
* problem, we're going to use the same technique than libatomics_ops,
* which is to temporarily switch to ARM, do the operation, then switch
* back to Thumb-1.
*
* This results in two 'bx' jumps, just like a normal function call, but
* everything is kept inlined, avoids loading or computing the function's
* address, and prevents a little I-cache trashing too.
*
* However, it is highly recommended to avoid compiling any C library source
* file that use these functions in Thumb-1 mode.
*
* Define three helper macros to implement this:
*/
#if defined(__thumb__) && !defined(__thumb2__)
# define __ATOMIC_SWITCH_TO_ARM \
"adr r3, 5f\n" \
"bx r3\n" \
".align\n" \
".arm\n" \
"5:\n"
/* note: the leading \n below is intentional */
# define __ATOMIC_SWITCH_TO_THUMB \
"\n" \
"adr r3, 6f\n" \
"bx r3\n" \
".thumb" \
"6:\n"
# define __ATOMIC_CLOBBERS "r3" /* list of clobbered registers */
/* Warn the user that ARM mode should really be preferred! */
# warning Rebuilding this source file in ARM mode is highly recommended for performance!!
#else
# define __ATOMIC_SWITCH_TO_ARM /* nothing */
# define __ATOMIC_SWITCH_TO_THUMB /* nothing */
# define __ATOMIC_CLOBBERS /* nothing */
#endif
/* Define a full memory barrier, this is only needed if we build the
* platform for a multi-core device. For the record, using a 'dmb'
* instruction on a Nexus One device can take up to 180 ns even if
* it is completely un-necessary on this device.
*
* NOTE: This is where the platform and NDK headers atomic headers are
* going to diverge. With the NDK, we don't know if the generated
* code is going to run on a single or multi-core device, so we
* need to be cautious.
*
* Fortunately, we can use the kernel helper function that is
* mapped at address 0xffff0fa0 in all user process, and that
* provides a device-specific barrier operation.
*
* I.e. on single-core devices, the helper immediately returns,
* on multi-core devices, it uses "dmb" or any other means to
* perform a full-memory barrier.
*
* There are three cases to consider for the platform:
*
* - multi-core ARMv7-A => use the 'dmb' hardware instruction
* - multi-core ARMv6 => use the coprocessor
* - single core ARMv5TE/6/7 => do not use any hardware barrier
*/
#if defined(ANDROID_SMP) && ANDROID_SMP == 1
/* Sanity check, multi-core is only supported starting from ARMv6 */
# if __ARM_ARCH__ < 6
# error ANDROID_SMP should not be set to 1 for an ARM architecture less than 6
# endif
# ifdef __ARM_HAVE_DMB
/* For ARMv7-A, we can use the 'dmb' instruction directly */
__ATOMIC_INLINE__ void
__bionic_memory_barrier(void)
{
/* Note: we always build in ARM or Thumb-2 on ARMv7-A, so don't
* bother with __ATOMIC_SWITCH_TO_ARM */
__asm__ __volatile__ ( "dmb" : : : "memory" );
}
# else /* !__ARM_HAVE_DMB */
/* Otherwise, i.e. for multi-core ARMv6, we need to use the coprocessor,
* which requires the use of a general-purpose register, which is slightly
* less efficient.
*/
__ATOMIC_INLINE__ void
__bionic_memory_barrier(void)
{
__asm__ __volatile__ (
__SWITCH_TO_ARM
"mcr p15, 0, %0, c7, c10, 5"
__SWITCH_TO_THUMB
: : "r" (0) : __ATOMIC_CLOBBERS "memory");
}
# endif /* !__ARM_HAVE_DMB */
#else /* !ANDROID_SMP */
__ATOMIC_INLINE__ void
__bionic_memory_barrier(void)
{
/* A simple compiler barrier */
__asm__ __volatile__ ( "" : : : "memory" );
}
#endif /* !ANDROID_SMP */
/* LDREX/STREX routines broken on ARMv6 */
# if __ARM_ARCH__ == 6
# define BROKEN_REX
# endif
/* Compare-and-swap, without any explicit barriers. Note that this functions
* returns 0 on success, and 1 on failure. The opposite convention is typically
* used on other platforms.
*
* There are two cases to consider:
*
* - ARMv6+ => use LDREX/STREX instructions
* - < ARMv6 => use kernel helper function mapped at 0xffff0fc0
*
* LDREX/STREX are only available starting from ARMv6
*/
#if defined(__ARM_HAVE_LDREX_STREX) && !defined(BROKEN_REX)
__ATOMIC_INLINE__ int
__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
{
int32_t prev, status;
do {
__asm__ __volatile__ (
__ATOMIC_SWITCH_TO_ARM
"ldrex %0, [%3]\n"
"mov %1, #0\n"
"teq %0, %4\n"
#ifdef __thumb2__
"it eq\n"
#endif
"strexeq %1, %5, [%3]"
__ATOMIC_SWITCH_TO_THUMB
: "=&r" (prev), "=&r" (status), "+m"(*ptr)
: "r" (ptr), "Ir" (old_value), "r" (new_value)
: __ATOMIC_CLOBBERS "cc");
} while (__builtin_expect(status != 0, 0));
return prev != old_value;
}
# else /* !__ARM_HAVE_LDREX_STREX */
/* Use the handy kernel helper function mapped at 0xffff0fc0 */
typedef int (kernel_cmpxchg)(int32_t, int32_t, volatile int32_t *);
__ATOMIC_INLINE__ int
__kernel_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
{
/* Note: the kernel function returns 0 on success too */
return (*(kernel_cmpxchg *)0xffff0fc0)(old_value, new_value, ptr);
}
__ATOMIC_INLINE__ int
__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
{
return __kernel_cmpxchg(old_value, new_value, ptr);
}
#endif /* !__ARM_HAVE_LDREX_STREX */
/* Swap operation, without any explicit barriers.
* There are again two similar cases to consider:
*
* ARMv6+ => use LDREX/STREX
* < ARMv6 => use SWP instead.
*/
#if defined(__ARM_HAVE_LDREX_STREX) && !defined(BROKEN_REX)
__ATOMIC_INLINE__ int32_t
__bionic_swap(int32_t new_value, volatile int32_t* ptr)
{
int32_t prev, status;
do {
__asm__ __volatile__ (
__ATOMIC_SWITCH_TO_ARM
"ldrex %0, [%3]\n"
"strex %1, %4, [%3]"
__ATOMIC_SWITCH_TO_THUMB
: "=&r" (prev), "=&r" (status), "+m" (*ptr)
: "r" (ptr), "r" (new_value)
: __ATOMIC_CLOBBERS "cc");
} while (__builtin_expect(status != 0, 0));
return prev;
}
#else /* !__ARM_HAVE_LDREX_STREX */
__ATOMIC_INLINE__ int32_t
__bionic_swap(int32_t new_value, volatile int32_t* ptr)
{
int32_t prev;
/* NOTE: SWP is available in Thumb-1 too */
__asm__ __volatile__ ("swp %0, %2, [%3]"
: "=&r" (prev), "+m" (*ptr)
: "r" (new_value), "r" (ptr)
: "cc");
return prev;
}
#endif /* !__ARM_HAVE_LDREX_STREX */
/* Atomic increment - without any barriers
* This returns the old value
*/
#if defined(__ARM_HAVE_LDREX_STREX) && !defined(BROKEN_REX)
__ATOMIC_INLINE__ int32_t
__bionic_atomic_inc(volatile int32_t* ptr)
{
int32_t prev, tmp, status;
do {
__asm__ __volatile__ (
__ATOMIC_SWITCH_TO_ARM
"ldrex %0, [%4]\n"
"add %1, %0, #1\n"
"strex %2, %1, [%4]"
__ATOMIC_SWITCH_TO_THUMB
: "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
: "r" (ptr)
: __ATOMIC_CLOBBERS "cc");
} while (__builtin_expect(status != 0, 0));
return prev;
}
#else
__ATOMIC_INLINE__ int32_t
__bionic_atomic_inc(volatile int32_t* ptr)
{
int32_t prev, status;
do {
prev = *ptr;
status = __kernel_cmpxchg(prev, prev+1, ptr);
} while (__builtin_expect(status != 0, 0));
return prev;
}
#endif
/* Atomic decrement - without any barriers
* This returns the old value.
*/
#if defined(__ARM_HAVE_LDREX_STREX) && !defined(BROKEN_REX)
__ATOMIC_INLINE__ int32_t
__bionic_atomic_dec(volatile int32_t* ptr)
{
int32_t prev, tmp, status;
do {
__asm__ __volatile__ (
__ATOMIC_SWITCH_TO_ARM
"ldrex %0, [%4]\n"
"sub %1, %0, #1\n"
"strex %2, %1, [%4]"
__ATOMIC_SWITCH_TO_THUMB
: "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
: "r" (ptr)
: __ATOMIC_CLOBBERS "cc");
} while (__builtin_expect(status != 0, 0));
return prev;
}
#else
__ATOMIC_INLINE__ int32_t
__bionic_atomic_dec(volatile int32_t* ptr)
{
int32_t prev, status;
do {
prev = *ptr;
status = __kernel_cmpxchg(prev, prev-1, ptr);
} while (__builtin_expect(status != 0, 0));
return prev;
}
#endif
#endif /* SYS_ATOMICS_ARM_H */
|