14 files changed, 576 insertions, 327 deletions
diff --git a/libc/bionic/atomics_x86.c b/libc/bionic/atomics_x86.c
deleted file mode 100644
index fd60f4f..0000000
--- a/libc/bionic/atomics_x86.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (C) 2008 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-#include <sys/atomics.h>
-
-#define FUTEX_SYSCALL 240
-#define FUTEX_WAIT 0
-#define FUTEX_WAKE 1
-
-int __futex_wait(volatile void *ftx, int val)
-{
-    int ret;
-    asm volatile (
-        "int $0x80;"
-        : "=a" (ret)
-        : "0" (FUTEX_SYSCALL),
-          "b" (ftx),
-          "c" (FUTEX_WAIT),
-          "d" (val),
-          "S" (0)
-    );
-    return ret;
-}
-
-int __futex_wake(volatile void *ftx, int count)
-{
-    int ret;
-    asm volatile (
-        "int $0x80;"
-        : "=a" (ret)
-        : "0" (FUTEX_SYSCALL),
-          "b" (ftx),
-          "c" (FUTEX_WAKE),
-          "d" (count)
-    );
-    return ret;
-}
-
-int __atomic_cmpxchg(int old, int new, volatile int* addr) {
-    int xchg;
-    asm volatile (
-        "lock;"
-        "cmpxchg %%ecx, (%%edx);"
-        "setne %%al;"
-        : "=a" (xchg)
-        : "a" (old),
-          "c" (new),
-          "d" (addr)
-    );
-    return xchg;
-}
-
-int __atomic_swap(int new, volatile int* addr) {
-    int old;
-    asm volatile (
-        "lock;"
-        "xchg %%ecx, (%%edx);"
-        : "=c" (old)
-        : "c" (new),
-          "d" (addr)
-    );
-    return old;
-}
-
-int __atomic_dec(volatile int* addr) {
-    int old;
-    do {
-        old = *addr;
-    } while (atomic_cmpxchg(old, old-1, addr));
-    return old;
-}
diff --git a/libc/bionic/pthread.c b/libc/bionic/pthread.c
index 1da2ec9..b56822f 100644
--- a/libc/bionic/pthread.c
+++ b/libc/bionic/pthread.c
@@ -692,7 +692,7 @@ FoundIt:
             goto Exit;
         }
     }
-    while ( __atomic_cmpxchg( flags, flags | PTHREAD_ATTR_FLAG_DETACHED,
+    while ( __bionic_cmpxchg( flags, flags | PTHREAD_ATTR_FLAG_DETACHED,
                               (volatile int*)&thread->attr.flags ) != 0 );
 Exit:
     pthread_mutex_unlock(&gThreadListLock);
@@ -926,17 +926,17 @@ _normal_lock(pthread_mutex_t*  mutex)
     int  shared = mutex->value & MUTEX_SHARED_MASK;
     /*
      * The common case is an unlocked mutex, so we begin by trying to
-     * change the lock's state from 0 to 1.  __atomic_cmpxchg() returns 0
+     * change the lock's state from 0 to 1.  __bionic_cmpxchg() returns 0
      * if it made the swap successfully.  If the result is nonzero, this
      * lock is already held by another thread.
      */
-    if (__atomic_cmpxchg(shared|0, shared|1, &mutex->value ) != 0) {
+    if (__bionic_cmpxchg(shared|0, shared|1, &mutex->value ) != 0) {
         /*
          * We want to go to sleep until the mutex is available, which
          * requires promoting it to state 2.  We need to swap in the new
          * state value and then wait until somebody wakes us up.
          *
-         * __atomic_swap() returns the previous value.  We swap 2 in and
+         * __bionic_swap() returns the previous value.  We swap 2 in and
          * see if we got zero back; if so, we have acquired the lock.  If
          * not, another thread still holds the lock and we wait again.
          *
@@ -947,7 +947,7 @@ _normal_lock(pthread_mutex_t*  mutex)
          * that the mutex is in state 2 when we go to sleep on it, which
          * guarantees a wake-up call.
          */
-        while (__atomic_swap(shared|2, &mutex->value ) != (shared|0))
+        while (__bionic_swap(shared|2, &mutex->value ) != (shared|0))
             __futex_wait_ex(&mutex->value, shared, shared|2, 0);
     }
     ANDROID_MEMBAR_FULL();
@@ -967,10 +967,10 @@ _normal_unlock(pthread_mutex_t*  mutex)
 
     /*
      * The mutex state will be 1 or (rarely) 2.  We use an atomic decrement
-     * to release the lock.  __atomic_dec() returns the previous value;
+     * to release the lock.  __bionic_atomic_dec() returns the previous value;
      * if it wasn't 1 we have to do some additional work.
      */
-    if (__atomic_dec(&mutex->value) != (shared|1)) {
+    if (__bionic_atomic_dec(&mutex->value) != (shared|1)) {
         /*
          * Start by releasing the lock.  The decrement changed it from
          * "contended lock" to "uncontended lock", which means we still
@@ -1158,7 +1158,7 @@ int pthread_mutex_trylock(pthread_mutex_t *mutex)
     /* Handle common case first */
     if ( __likely(mtype == MUTEX_TYPE_NORMAL) )
     {
-        if (__atomic_cmpxchg(shared|0, shared|1, &mutex->value) == 0) {
+        if (__bionic_cmpxchg(shared|0, shared|1, &mutex->value) == 0) {
             ANDROID_MEMBAR_FULL();
             return 0;
         }
@@ -1256,13 +1256,13 @@ int pthread_mutex_lock_timeout_np(pthread_mutex_t *mutex, unsigned msecs)
     if ( __likely(mtype == MUTEX_TYPE_NORMAL) )
     {
         /* fast path for uncontended lock */
-        if (__atomic_cmpxchg(shared|0, shared|1, &mutex->value) == 0) {
+        if (__bionic_cmpxchg(shared|0, shared|1, &mutex->value) == 0) {
             ANDROID_MEMBAR_FULL();
             return 0;
         }
 
         /* loop while needed */
-        while (__atomic_swap(shared|2, &mutex->value) != (shared|0)) {
+        while (__bionic_swap(shared|2, &mutex->value) != (shared|0)) {
             if (__timespec_to_absolute(&ts, &abstime, clock) < 0)
                 return EBUSY;
 
@@ -1431,7 +1431,7 @@ __pthread_cond_pulse(pthread_cond_t *cond, int  counter)
         long oldval = cond->value;
         long newval = ((oldval - COND_COUNTER_INCREMENT) & COND_COUNTER_MASK)
                       | flags;
-        if (__atomic_cmpxchg(oldval, newval, &cond->value) == 0)
+        if (__bionic_cmpxchg(oldval, newval, &cond->value) == 0)
             break;
     }
 
@@ -1856,7 +1856,21 @@ int pthread_kill(pthread_t tid, int sig)
     return ret;
 }
 
-extern int __rt_sigprocmask(int, const sigset_t *, sigset_t *, size_t);
+/* Despite the fact that our kernel headers define sigset_t explicitly
+ * as a 32-bit integer, the kernel system call really expects a 64-bit
+ * bitmap for the signal set, or more exactly an array of two-32-bit
+ * values (see $KERNEL/arch/$ARCH/include/asm/signal.h for details).
+ *
+ * Unfortunately, we cannot fix the sigset_t definition without breaking
+ * the C library ABI, so perform a little runtime translation here.
+ */
+typedef union {
+    sigset_t   bionic;
+    uint32_t   kernel[2];
+} kernel_sigset_t;
+
+/* this is a private syscall stub */
+extern int __rt_sigprocmask(int, const kernel_sigset_t *, kernel_sigset_t *, size_t);
 
 int pthread_sigmask(int how, const sigset_t *set, sigset_t *oset)
 {
@@ -1865,16 +1879,31 @@ int pthread_sigmask(int how, const sigset_t *set, sigset_t *oset)
      */
     int ret, old_errno = errno;
 
-    /* Use NSIG which corresponds to the number of signals in
-     * our 32-bit sigset_t implementation. As such, this function, or
-     * anything that deals with sigset_t cannot manage real-time signals
-     * (signo >= 32). We might want to introduce sigset_rt_t as an
-     * extension to do so in the future.
+    /* We must convert *set into a kernel_sigset_t */
+    kernel_sigset_t  in_set, *in_set_ptr;
+    kernel_sigset_t  out_set;
+
+    in_set.kernel[0]  = in_set.kernel[1]  =  0;
+    out_set.kernel[0] = out_set.kernel[1] = 0;
+
+    /* 'in_set_ptr' is the second parameter to __rt_sigprocmask. It must be NULL
+     * if 'set' is NULL to ensure correct semantics (which in this case would
+     * be to ignore 'how' and return the current signal set into 'oset'.
      */
-    ret = __rt_sigprocmask(how, set, oset, NSIG / 8);
+    if (set == NULL) {
+        in_set_ptr = NULL;
+    } else {
+        in_set.bionic = *set;
+        in_set_ptr = &in_set;
+    }
+
+    ret = __rt_sigprocmask(how, in_set_ptr, &out_set, sizeof(kernel_sigset_t));
     if (ret < 0)
         ret = errno;
 
+    if (oset)
+        *oset = out_set.bionic;
+
     errno = old_errno;
     return ret;
 }
diff --git a/libc/bionic/semaphore.c b/libc/bionic/semaphore.c
index 96819ae..9bc8412 100644
--- a/libc/bionic/semaphore.c
+++ b/libc/bionic/semaphore.c
@@ -174,7 +174,7 @@ __sem_dec(volatile unsigned int *pvalue)
 
         new = SEMCOUNT_DECREMENT(old);
     }
-    while (__atomic_cmpxchg((int)(old|shared),
+    while (__bionic_cmpxchg((int)(old|shared),
                             (int)(new|shared),
                             (volatile int *)pvalue) != 0);
     return ret;
@@ -198,7 +198,7 @@ __sem_trydec(volatile unsigned int *pvalue)
 
         new = SEMCOUNT_DECREMENT(old);
     }
-    while (__atomic_cmpxchg((int)(old|shared),
+    while (__bionic_cmpxchg((int)(old|shared),
                             (int)(new|shared),
                             (volatile int *)pvalue) != 0);
 
@@ -235,7 +235,7 @@ __sem_inc(volatile unsigned int *pvalue)
         else
             new = SEMCOUNT_INCREMENT(old);
     }
-    while ( __atomic_cmpxchg((int)(old|shared),
+    while ( __bionic_cmpxchg((int)(old|shared),
                              (int)(new|shared),
                              (volatile int*)pvalue) != 0);
 
diff --git a/libc/include/pthread.h b/libc/include/pthread.h
index 9d05769..2015ac0 100644
--- a/libc/include/pthread.h
+++ b/libc/include/pthread.h
@@ -42,9 +42,13 @@ typedef struct
     int volatile value;
 } pthread_mutex_t;
 
-#define  PTHREAD_MUTEX_INITIALIZER             {0}
-#define  PTHREAD_RECURSIVE_MUTEX_INITIALIZER   {0x4000}
-#define  PTHREAD_ERRORCHECK_MUTEX_INITIALIZER  {0x8000}
+#define  __PTHREAD_MUTEX_INIT_VALUE            0
+#define  __PTHREAD_RECURSIVE_MUTEX_INIT_VALUE  0x4000
+#define  __PTHREAD_ERRORCHECK_MUTEX_INIT_VALUE 0x8000
+
+#define  PTHREAD_MUTEX_INITIALIZER             {__PTHREAD_MUTEX_INIT_VALUE}
+#define  PTHREAD_RECURSIVE_MUTEX_INITIALIZER   {__PTHREAD_RECURSIVE_MUTEX_INIT_VALUE}
+#define  PTHREAD_ERRORCHECK_MUTEX_INITIALIZER  {__PTHREAD_ERRORCHECK_MUTEX_INIT_VALUE}
 
 enum {
     PTHREAD_MUTEX_NORMAL = 0,
diff --git a/libc/kernel/common/media/soc2030.h b/libc/kernel/common/media/soc2030.h
index ad0ddfc..850ab13 100644
--- a/libc/kernel/common/media/soc2030.h
+++ b/libc/kernel/common/media/soc2030.h
@@ -12,7 +12,7 @@
 #ifndef __SOC2030_H__
 #define __SOC2030_H__
 
-#include <linux/ioctl.h>  
+#include <linux/ioctl.h>
 
 #define SOC2030_IOCTL_SET_MODE _IOWR('o', 1, struct soc2030_mode)
 #define SOC2030_IOCTL_GET_STATUS _IOC(_IOC_READ, 'o', 2, 10)
@@ -22,10 +22,11 @@
 #define SOC2030_IOCTL_SET_EFFECT _IOWR('o', 6, unsigned int)
 #define SOC2030_IOCTL_SET_WHITEBALANCE _IOWR('o', 7, unsigned int)
 #define SOC2030_IOCTL_SET_EXP_COMP _IOWR('o', 8, int)
+#define SOC2030_IOCTL_SET_LOCK _IOWR('o', 9, struct soc2030_lock)
 
 #define SOC2030_POLL_WAITMS 50
 #define SOC2030_MAX_RETRIES 3
-#define SOC2030_POLL_RETRIES 5
+#define SOC2030_POLL_RETRIES 7
 
 #define SOC2030_MAX_PRIVATE_SIZE 1024
 #define SOC2030_MAX_NUM_MODES 6
@@ -45,8 +46,21 @@ enum {
  WRITE_VAR_DATA,
  POLL_VAR_DATA,
  DELAY_MS,
+ WRITE_REG_VAR1,
+ WRITE_REG_VAR2,
+ WRITE_REG_VAR3,
+ WRITE_REG_VAR4,
+ READ_REG_VAR1,
+ READ_REG_VAR2,
+ READ_REG_VAR3,
+ READ_REG_VAR4,
 };
 
+#define REG_VAR1 (READ_REG_VAR1 - READ_REG_VAR1)
+#define REG_VAR2 (READ_REG_VAR2 - READ_REG_VAR1)
+#define REG_VAR3 (READ_REG_VAR3 - READ_REG_VAR1)
+#define REG_VAR4 (READ_REG_VAR4 - READ_REG_VAR1)
+
 enum {
  EFFECT_NONE,
  EFFECT_BW,
@@ -74,6 +88,14 @@ struct soc2030_regs {
  __u16 val;
 };
 
+struct soc2030_lock {
+        __u8 aelock;
+        __u8 aerelock;
+        __u8 awblock;
+        __u8 awbrelock;
+        __u8 previewactive;
+};
+
 struct soc2030_mode {
  int xres;
  int yres;
@@ -82,5 +104,3 @@ struct soc2030_mode {
 };
 
 #endif
-
-
diff --git a/libc/private/bionic_atomic_arm.h b/libc/private/bionic_atomic_arm.h
new file mode 100644
index 0000000..275c1c9
--- /dev/null
+++ b/libc/private/bionic_atomic_arm.h
@@ -0,0 +1,284 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef BIONIC_ATOMIC_ARM_H
+#define BIONIC_ATOMIC_ARM_H
+
+#include <machine/cpu-features.h>
+
+/* Some of the harware instructions used below are not available in Thumb-1
+ * mode (they are if you build in ARM or Thumb-2 mode though). To solve this
+ * problem, we're going to use the same technique than libatomics_ops,
+ * which is to temporarily switch to ARM, do the operation, then switch
+ * back to Thumb-1.
+ *
+ * This results in two 'bx' jumps, just like a normal function call, but
+ * everything is kept inlined, avoids loading or computing the function's
+ * address, and prevents a little I-cache trashing too.
+ *
+ * However, it is highly recommended to avoid compiling any C library source
+ * file that use these functions in Thumb-1 mode.
+ *
+ * Define three helper macros to implement this:
+ */
+#if defined(__thumb__) && !defined(__thumb2__)
+#  define  __ATOMIC_SWITCH_TO_ARM \
+            "adr r3, 5f\n" \
+            "bx  r3\n" \
+            ".align\n" \
+            ".arm\n" \
+        "5:\n"
+/* note: the leading \n below is intentional */
+#  define __ATOMIC_SWITCH_TO_THUMB \
+            "\n" \
+            "adr r3, 6f\n" \
+            "bx  r3\n" \
+            ".thumb" \
+        "6:\n"
+
+#  define __ATOMIC_CLOBBERS   "r3"  /* list of clobbered registers */
+
+/* Warn the user that ARM mode should really be preferred! */
+#  warning Rebuilding this source file in ARM mode is highly recommended for performance!!
+
+#else
+#  define  __ATOMIC_SWITCH_TO_ARM   /* nothing */
+#  define  __ATOMIC_SWITCH_TO_THUMB /* nothing */
+#  define  __ATOMIC_CLOBBERS        /* nothing */
+#endif
+
+
+/* Define a full memory barrier, this is only needed if we build the
+ * platform for a multi-core device. For the record, using a 'dmb'
+ * instruction on a Nexus One device can take up to 180 ns even if
+ * it is completely un-necessary on this device.
+ *
+ * NOTE: This is where the platform and NDK headers atomic headers are
+ *        going to diverge. With the NDK, we don't know if the generated
+ *        code is going to run on a single or multi-core device, so we
+ *        need to be cautious.
+ *
+ *        Fortunately, we can use the kernel helper function that is
+ *        mapped at address 0xffff0fa0 in all user process, and that
+ *        provides a device-specific barrier operation.
+ *
+ *        I.e. on single-core devices, the helper immediately returns,
+ *        on multi-core devices, it uses "dmb" or any other means to
+ *        perform a full-memory barrier.
+ *
+ * There are three cases to consider for the platform:
+ *
+ *    - multi-core ARMv7-A       => use the 'dmb' hardware instruction
+ *    - multi-core ARMv6         => use the coprocessor
+ *    - single core ARMv5TE/6/7  => do not use any hardware barrier
+ */
+#if defined(ANDROID_SMP) && ANDROID_SMP == 1
+
+/* Sanity check, multi-core is only supported starting from ARMv6 */
+#  if __ARM_ARCH__ < 6
+#    error ANDROID_SMP should not be set to 1 for an ARM architecture less than 6
+#  endif
+
+#  ifdef __ARM_HAVE_DMB
+/* For ARMv7-A, we can use the 'dmb' instruction directly */
+__ATOMIC_INLINE__ void
+__bionic_memory_barrier(void)
+{
+    /* Note: we always build in ARM or Thumb-2 on ARMv7-A, so don't
+     * bother with __ATOMIC_SWITCH_TO_ARM */
+    __asm__ __volatile__ ( "dmb" : : : "memory" );
+}
+#  else /* !__ARM_HAVE_DMB */
+/* Otherwise, i.e. for multi-core ARMv6, we need to use the coprocessor,
+ * which requires the use of a general-purpose register, which is slightly
+ * less efficient.
+ */
+__ATOMIC_INLINE__ void
+__bionic_memory_barrier(void)
+{
+    __asm__ __volatile__ (
+        __SWITCH_TO_ARM
+        "mcr p15, 0, %0, c7, c10, 5"
+        __SWITCH_TO_THUMB
+        : : "r" (0) : __ATOMIC_CLOBBERS "memory");
+}
+#  endif /* !__ARM_HAVE_DMB */
+#else /* !ANDROID_SMP */
+__ATOMIC_INLINE__ void
+__bionic_memory_barrier(void)
+{
+    /* A simple compiler barrier */
+    __asm__ __volatile__ ( "" : : : "memory" );
+}
+#endif /* !ANDROID_SMP */
+
+/* Compare-and-swap, without any explicit barriers. Note that this functions
+ * returns 0 on success, and 1 on failure. The opposite convention is typically
+ * used on other platforms.
+ *
+ * There are two cases to consider:
+ *
+ *     - ARMv6+  => use LDREX/STREX instructions
+ *     - < ARMv6 => use kernel helper function mapped at 0xffff0fc0
+ *
+ * LDREX/STREX are only available starting from ARMv6
+ */
+#ifdef __ARM_HAVE_LDREX_STREX
+__ATOMIC_INLINE__ int
+__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
+{
+    int32_t prev, status;
+    do {
+        __asm__ __volatile__ (
+            __ATOMIC_SWITCH_TO_ARM
+            "ldrex %0, [%3]\n"
+            "mov %1, #0\n"
+            "teq %0, %4\n"
+#ifdef __thumb2__
+            "it eq\n"
+#endif
+            "strexeq %1, %5, [%3]"
+            __ATOMIC_SWITCH_TO_THUMB
+            : "=&r" (prev), "=&r" (status), "+m"(*ptr)
+            : "r" (ptr), "Ir" (old_value), "r" (new_value)
+            : __ATOMIC_CLOBBERS "cc");
+    } while (__builtin_expect(status != 0, 0));
+    return prev != old_value;
+}
+#  else /* !__ARM_HAVE_LDREX_STREX */
+
+/* Use the handy kernel helper function mapped at 0xffff0fc0 */
+typedef int (kernel_cmpxchg)(int32_t, int32_t, volatile int32_t *);
+
+__ATOMIC_INLINE__ int
+__kernel_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
+{
+    /* Note: the kernel function returns 0 on success too */
+    return (*(kernel_cmpxchg *)0xffff0fc0)(old_value, new_value, ptr);
+}
+
+__ATOMIC_INLINE__ int
+__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
+{
+    return __kernel_cmpxchg(old_value, new_value, ptr);
+}
+#endif /* !__ARM_HAVE_LDREX_STREX */
+
+/* Swap operation, without any explicit barriers.
+ * There are again two similar cases to consider:
+ *
+ *   ARMv6+ => use LDREX/STREX
+ *   < ARMv6 => use SWP instead.
+ */
+#ifdef __ARM_HAVE_LDREX_STREX
+__ATOMIC_INLINE__ int32_t
+__bionic_swap(int32_t new_value, volatile int32_t* ptr)
+{
+    int32_t prev, status;
+    do {
+        __asm__ __volatile__ (
+            __ATOMIC_SWITCH_TO_ARM
+            "ldrex %0, [%3]\n"
+            "strex %1, %4, [%3]"
+            __ATOMIC_SWITCH_TO_THUMB
+            : "=&r" (prev), "=&r" (status), "+m" (*ptr)
+            : "r" (ptr), "r" (new_value)
+            : __ATOMIC_CLOBBERS "cc");
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#else /* !__ARM_HAVE_LDREX_STREX */
+__ATOMIC_INLINE__ int32_t
+__bionic_swap(int32_t new_value, volatile int32_t* ptr)
+{
+    int32_t prev;
+    /* NOTE: SWP is available in Thumb-1 too */
+    __asm__ __volatile__ ("swp %0, %2, [%3]"
+                          : "=&r" (prev), "+m" (*ptr)
+                          : "r" (new_value), "r" (ptr)
+                          : "cc");
+    return prev;
+}
+#endif /* !__ARM_HAVE_LDREX_STREX */
+
+/* Atomic increment - without any barriers
+ * This returns the old value
+ */
+#ifdef __ARM_HAVE_LDREX_STREX
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_inc(volatile int32_t* ptr)
+{
+    int32_t prev, tmp, status;
+    do {
+        __asm__ __volatile__ (
+            __ATOMIC_SWITCH_TO_ARM
+            "ldrex %0, [%4]\n"
+            "add %1, %0, #1\n"
+            "strex %2, %1, [%4]"
+            __ATOMIC_SWITCH_TO_THUMB
+            : "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
+            : "r" (ptr)
+            : __ATOMIC_CLOBBERS "cc");
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#else
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_inc(volatile int32_t* ptr)
+{
+    int32_t  prev, status;
+    do {
+        prev = *ptr;
+        status = __kernel_cmpxchg(prev, prev+1, ptr);
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#endif
+
+/* Atomic decrement - without any barriers
+ * This returns the old value.
+ */
+#ifdef __ARM_HAVE_LDREX_STREX
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_dec(volatile int32_t* ptr)
+{
+    int32_t prev, tmp, status;
+    do {
+        __asm__ __volatile__ (
+            __ATOMIC_SWITCH_TO_ARM
+            "ldrex %0, [%4]\n"
+            "sub %1, %0, #1\n"
+            "strex %2, %1, [%4]"
+            __ATOMIC_SWITCH_TO_THUMB
+            : "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
+            : "r" (ptr)
+            : __ATOMIC_CLOBBERS "cc");
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#else
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_dec(volatile int32_t* ptr)
+{
+    int32_t  prev, status;
+    do {
+        prev = *ptr;
+        status = __kernel_cmpxchg(prev, prev-1, ptr);
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#endif
+
+#endif /* SYS_ATOMICS_ARM_H */
diff --git a/libc/private/bionic_atomic_gcc_builtin.h b/libc/private/bionic_atomic_gcc_builtin.h
new file mode 100644
index 0000000..e7c5761
--- /dev/null
+++ b/libc/private/bionic_atomic_gcc_builtin.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef BIONIC_ATOMIC_GCC_BUILTIN_H
+#define BIONIC_ATOMIC_GCC_BUILTIN_H
+
+/* This header file is used by default if we don't have optimized atomic
+ * routines for a given platform. See bionic_atomic_arm.h and
+ * bionic_atomic_x86.h for examples.
+ */
+
+__ATOMIC_INLINE__ void
+__bionic_memory_barrier(void)
+{
+    __sync_synchronize();
+}
+
+__ATOMIC_INLINE__ int
+__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
+{
+    /* We must return 0 on success */
+    return __sync_bool_compare_and_swap(ptr, old_value, new_value) == 0;
+}
+
+__ATOMIC_INLINE__ int32_t
+__bionic_swap(int32_t new_value, volatile int32_t* ptr)
+{
+    int32_t prev;
+    do {
+        prev = *ptr;
+        status = __sync_val_compare_and_swap(ptr, prev, new_value);
+    } while (status == 0);
+    return prev;
+}
+
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_inc(volatile int32_t* ptr)
+{
+    /* We must return the old value */
+    return __sync_fetch_and_add(ptr, 1);
+}
+
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_dec(volatile int32_t* ptr)
+{
+    /* We must return the old value */
+    return __sync_fetch_and_add(ptr, -1);
+}
+
+#endif /* BIONIC_ATOMIC_GCC_BUILTIN_H */
diff --git a/libc/private/bionic_atomic_inline.h b/libc/private/bionic_atomic_inline.h
index 95766e1..821ad39 100644
--- a/libc/private/bionic_atomic_inline.h
+++ b/libc/private/bionic_atomic_inline.h
@@ -43,62 +43,21 @@
 extern "C" {
 #endif
 
-/*
- * Define the full memory barrier for an SMP system.  This is
- * platform-specific.
+/* Define __ATOMIC_INLINE__ to control the inlining of all atomics
+ * functions declared here. For a slight performance boost, we want
+ * all of them to be always_inline
  */
+#define  __ATOMIC_INLINE__  static __inline__ __attribute__((always_inline))
 
 #ifdef __arm__
-#include <machine/cpu-features.h>
-
-/*
- * For ARMv6K we need to issue a specific MCR instead of the DMB, since
- * that wasn't added until v7.  For anything older, SMP isn't relevant.
- * Since we don't have an ARMv6K to test with, we're not going to deal
- * with that now.
- *
- * The DMB instruction is found in the ARM and Thumb2 instruction sets.
- * This will fail on plain 16-bit Thumb.
- */
-#if defined(__ARM_HAVE_DMB)
-# define _ANDROID_MEMBAR_FULL_SMP() \
-    do { __asm__ __volatile__ ("dmb" ::: "memory"); } while (0)
+#  include <bionic_atomic_arm.h>
+#elif defined(__i386__)
+#  include <bionic_atomic_x86.h>
 #else
-# define _ANDROID_MEMBAR_FULL_SMP()  ARM_SMP_defined_but_no_DMB()
+#  include <bionic_atomic_gcc_builtin.h>
 #endif
 
-#elif defined(__i386__) || defined(__x86_64__)
-/*
- * For recent x86, we can use the SSE2 mfence instruction.
- */
-# define _ANDROID_MEMBAR_FULL_SMP() \
-    do { __asm__ __volatile__ ("mfence" ::: "memory"); } while (0)
-
-#else
-/*
- * Implementation not defined for this platform.  Hopefully we're building
- * in uniprocessor mode.
- */
-# define _ANDROID_MEMBAR_FULL_SMP()  SMP_barrier_not_defined_for_platform()
-#endif
-
-
-/*
- * Full barrier.  On uniprocessors this is just a compiler reorder barrier,
- * which ensures that the statements appearing above the barrier in the C/C++
- * code will be issued after the statements appearing below the barrier.
- *
- * For SMP this also includes a memory barrier instruction.  On an ARM
- * CPU this means that the current core will flush pending writes, wait
- * for pending reads to complete, and discard any cached reads that could
- * be stale.  Other CPUs may do less, but the end result is equivalent.
- */
-#if ANDROID_SMP != 0
-# define ANDROID_MEMBAR_FULL() _ANDROID_MEMBAR_FULL_SMP()
-#else
-# define ANDROID_MEMBAR_FULL() \
-    do { __asm__ __volatile__ ("" ::: "memory"); } while (0)
-#endif
+#define ANDROID_MEMBAR_FULL  __bionic_memory_barrier
 
 #ifdef __cplusplus
 } // extern "C"
diff --git a/libc/private/bionic_atomic_x86.h b/libc/private/bionic_atomic_x86.h
new file mode 100644
index 0000000..aca0c4b
--- /dev/null
+++ b/libc/private/bionic_atomic_x86.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef BIONIC_ATOMIC_X86_H
+#define BIONIC_ATOMIC_X86_H
+
+/* Define a full memory barrier, this is only needed if we build the
+ * platform for a multi-core device.
+ */
+#if defined(ANDROID_SMP) && ANDROID_SMP == 1
+__ATOMIC_INLINE__ void
+__bionic_memory_barrier()
+{
+    __asm__ __volatile__ ( "mfence" : : : "memory" );
+}
+#else
+__ATOMIC_INLINE__ void
+__bionic_memory_barrier()
+{
+    /* A simple compiler barrier */
+    __asm__ __volatile__ ( "" : : : "memory" );
+}
+#endif
+
+/* Compare-and-swap, without any explicit barriers. Note that this function
+ * returns 0 on success, and 1 on failure. The opposite convention is typically
+ * used on other platforms.
+ */
+__ATOMIC_INLINE__ int
+__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
+{
+    int32_t prev;
+    __asm__ __volatile__ ("lock; cmpxchgl %1, %2"
+                          : "=a" (prev)
+                          : "q" (new_value), "m" (*ptr), "0" (old_value)
+                          : "memory");
+    return prev != old_value;
+}
+
+
+/* Swap, without any explicit barriers */
+__ATOMIC_INLINE__ int32_t
+__bionic_swap(int32_t new_value, volatile int32_t *ptr)
+{
+    __asm__ __volatile__ ("xchgl %1, %0"
+                          : "=r" (new_value)
+                          : "m" (*ptr), "0" (new_value)
+                          : "memory");
+    return new_value;
+}
+
+/* Atomic increment, without explicit barriers */
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_inc(volatile int32_t *ptr)
+{
+    int increment = 1;
+    __asm__ __volatile__ ("lock; xaddl %0, %1"
+                          : "+r" (increment), "+m" (*ptr)
+                          : : "memory");
+    /* increment now holds the old value of *ptr */
+    return increment;
+}
+
+/* Atomic decrement, without explicit barriers */
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_dec(volatile int32_t *ptr)
+{
+    int increment = -1;
+    __asm__ __volatile__ ("lock; xaddl %0, %1"
+                          : "+r" (increment), "+m" (*ptr)
+                          : : "memory");
+    /* increment now holds the old value of *ptr */
+    return increment;
+}
+
+#endif /* BIONIC_ATOMIC_X86_H */
diff --git a/libc/stdio/fileext.h b/libc/stdio/fileext.h
index 2d07043..b36a448 100644
--- a/libc/stdio/fileext.h
+++ b/libc/stdio/fileext.h
@@ -29,24 +29,41 @@
  * $Citrus$
  */
 
+#include <pthread.h>
+#include "wcio.h"
+
 /*
  * file extension
  */
 struct __sfileext {
 	struct	__sbuf _ub; /* ungetc buffer */
 	struct wchar_io_data _wcio;	/* wide char io status */
+	pthread_mutex_t _lock; /* file lock */
 };
 
+#define _FILEEXT_INITIALIZER  {{NULL,0},{0},PTHREAD_RECURSIVE_MUTEX_INITIALIZER}
+
 #define _EXT(fp) ((struct __sfileext *)((fp)->_ext._base))
 #define _UB(fp) _EXT(fp)->_ub
+#define _FLOCK(fp)  _EXT(fp)->_lock
 
 #define _FILEEXT_INIT(fp) \
 do { \
 	_UB(fp)._base = NULL; \
 	_UB(fp)._size = 0; \
 	WCIO_INIT(fp); \
+	_FLOCK_INIT(fp); \
 } while (0)
 
+/* Helper macros to avoid a function call when you know that fp is not NULL.
+ * Notice that we keep _FLOCK_INIT() fast by slightly breaking our pthread
+ * encapsulation.
+ */
+#define _FLOCK_INIT(fp)    _FLOCK(fp).value = __PTHREAD_RECURSIVE_MUTEX_INIT_VALUE
+#define _FLOCK_LOCK(fp)    pthread_mutex_lock(&_FLOCK(fp))
+#define _FLOCK_TRYLOCK(fp) pthread_mutex_trylock(&_FLOCK(fp))
+#define _FLOCK_UNLOCK(fp)  pthread_mutex_unlock(&_FLOCK(fp))
+
 #define _FILEEXT_SETUP(f, fext) \
 do { \
 	(f)->_ext._base = (unsigned char *)(fext); \
diff --git a/libc/stdio/findfp.c b/libc/stdio/findfp.c
index a659c87..76ed5ee 100644
--- a/libc/stdio/findfp.c
+++ b/libc/stdio/findfp.c
@@ -58,7 +58,12 @@ static struct glue uglue = { 0, FOPEN_MAX - 3, usual };
 static struct glue *lastglue = &uglue;
 _THREAD_PRIVATE_MUTEX(__sfp_mutex);
 
-static struct __sfileext __sFext[3];
+static struct __sfileext __sFext[3] = {
+	_FILEEXT_INITIALIZER,
+	_FILEEXT_INITIALIZER,
+	_FILEEXT_INITIALIZER,
+};
+
 FILE __sF[3] = {
 	std(__SRD, STDIN_FILENO),		/* stdin */
 	std(__SWR, STDOUT_FILENO),		/* stdout */
diff --git a/libc/stdio/flockfile.c b/libc/stdio/flockfile.c
index e8c74c5..368fb15 100644
--- a/libc/stdio/flockfile.c
+++ b/libc/stdio/flockfile.c
@@ -31,122 +31,23 @@
  * we can't use the OpenBSD implementation which uses kernel-specific
  * APIs not available on Linux.
  *
- * Ideally, this would be trivially implemented by adding a
- * pthread_mutex_t field to struct __sFILE as defined in
- * <stdio.h>.
- *
- * However, since we don't want to bring pthread into the mix
- * as well as change the size of a public API/ABI structure,
- * we're going to store the data out-of-band.
- *
- * we use a hash-table to map FILE* pointers to recursive mutexes
- * fclose() will call __fremovelock() defined below to remove
- * a pointer from the table.
+ * Instead, we use a pthread_mutex_t within the FILE* internal state.
+ * See fileext.h for details.
  *
  * the behaviour, if fclose() is called while the corresponding
  * file is locked is totally undefined.
  */
 #include <stdio.h>
-#include <pthread.h>
 #include <string.h>
+#include <errno.h>
+#include "fileext.h"
 
-/* a node in the hash table */
-typedef struct FileLock {
-    struct FileLock*  next;
-    FILE*             file;
-    pthread_mutex_t   mutex;
-} FileLock;
-
-/* use a static hash table. We assume that we're not going to
- * lock a really large number of FILE* objects on an embedded
- * system.
- */
-#define  FILE_LOCK_BUCKETS  32
-
-typedef struct {
-    pthread_mutex_t   lock;
-    FileLock*         buckets[ FILE_LOCK_BUCKETS ];
-} LockTable;
-
-static LockTable*      _lockTable;
-static pthread_once_t  _lockTable_once = PTHREAD_ONCE_INIT;
-
-static void
-lock_table_init( void )
-{
-    _lockTable = malloc(sizeof(*_lockTable));
-    if (_lockTable != NULL) {
-        pthread_mutex_init(&_lockTable->lock, NULL);
-        memset(_lockTable->buckets, 0, sizeof(_lockTable->buckets));
-    }
-}
-
-static LockTable*
-lock_table_lock( void )
-{
-    pthread_once( &_lockTable_once, lock_table_init );
-    pthread_mutex_lock( &_lockTable->lock );
-    return _lockTable;
-}
-
-static void
-lock_table_unlock( LockTable*  t )
-{
-    pthread_mutex_unlock( &t->lock );
-}
-
-static FileLock**
-lock_table_lookup( LockTable*  t, FILE*  f )
-{
-    uint32_t    hash = (uint32_t)(void*)f;
-    FileLock**  pnode;
-
-    hash = (hash >> 2) ^ (hash << 17);
-    pnode = &t->buckets[hash % FILE_LOCK_BUCKETS];
-    for (;;) {
-        FileLock*  node = *pnode;
-        if (node == NULL || node->file == f)
-            break;
-        pnode = &node->next;
-    }
-    return pnode;
-}
 
 void
 flockfile(FILE * fp)
 {
-    LockTable*  t = lock_table_lock();
-
-    if (t != NULL) {
-        FileLock**  lookup = lock_table_lookup(t, fp);
-        FileLock*   lock   = *lookup;
-
-        if (lock == NULL) {
-            pthread_mutexattr_t  attr;
-
-            /* create a new node in the hash table */
-            lock = malloc(sizeof(*lock));
-            if (lock == NULL) {
-                lock_table_unlock(t);
-                return;
-            }
-            lock->next        = NULL;
-            lock->file        = fp;
-
-            pthread_mutexattr_init(&attr);
-            pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
-            pthread_mutex_init( &lock->mutex, &attr );
-
-            *lookup           = lock;
-        }
-        lock_table_unlock(t);
-
-        /* we assume that another thread didn't destroy 'lock'
-        * by calling fclose() on the FILE*. This can happen if
-        * the client is *really* buggy, but we don't care about
-        * such code here.
-        */
-        pthread_mutex_lock(&lock->mutex);
+    if (fp != NULL) {
+        _FLOCK_LOCK(fp);
     }
 }
 
@@ -154,21 +55,13 @@ flockfile(FILE * fp)
 int
 ftrylockfile(FILE *fp)
 {
-    int         ret = -1;
-    LockTable*  t   = lock_table_lock();
-
-    if (t != NULL) {
-        FileLock**  lookup = lock_table_lookup(t, fp);
-        FileLock*   lock   = *lookup;
+    /* The specification for ftrylockfile() says it returns 0 on success,
+     * or non-zero on error. So return an errno code directly on error.
+     */
+    int  ret = EINVAL;
 
-        lock_table_unlock(t);
-
-        /* see above comment about why we assume that 'lock' can
-        * be accessed from here
-        */
-        if (lock != NULL && !pthread_mutex_trylock(&lock->mutex)) {
-            ret = 0;  /* signal success */
-        }
+    if (fp != NULL) {
+        ret = _FLOCK_TRYLOCK(fp);
     }
     return ret;
 }
@@ -176,35 +69,7 @@ ftrylockfile(FILE *fp)
 void
 funlockfile(FILE * fp)
 {
-    LockTable*  t = lock_table_lock();
-
-    if (t != NULL) {
-        FileLock**  lookup = lock_table_lookup(t, fp);
-        FileLock*   lock   = *lookup;
-
-        if (lock != NULL)
-            pthread_mutex_unlock(&lock->mutex);
-
-        lock_table_unlock(t);
-    }
-}
-
-
-/* called from fclose() to remove the file lock */
-__LIBC_HIDDEN__ void
-__fremovelock(FILE*  fp)
-{
-    LockTable*  t = lock_table_lock();
-
-    if (t != NULL) {
-        FileLock**  lookup = lock_table_lookup(t, fp);
-        FileLock*   lock   = *lookup;
-
-        if (lock != NULL) {
-            *lookup   = lock->next;
-            lock->file = NULL;
-        }
-        lock_table_unlock(t);
-        free(lock);
+    if (fp != NULL) {
+        _FLOCK_UNLOCK(fp);
     }
 }
diff --git a/libstdc++/src/one_time_construction.cpp b/libstdc++/src/one_time_construction.cpp
index 2a44c79..f3d7138 100644
--- a/libstdc++/src/one_time_construction.cpp
+++ b/libstdc++/src/one_time_construction.cpp
@@ -20,11 +20,11 @@ extern "C" int __cxa_guard_acquire(int volatile * gv)
     // 6 untouched, wait and return 0
     // 1 untouched, return 0
 retry:
-    if (__atomic_cmpxchg(0, 0x2, gv) == 0) {
+    if (__bionic_cmpxchg(0, 0x2, gv) == 0) {
         ANDROID_MEMBAR_FULL();
         return 1;
     }
-    __atomic_cmpxchg(0x2, 0x6, gv); // Indicate there is a waiter
+    __bionic_cmpxchg(0x2, 0x6, gv); // Indicate there is a waiter
     __futex_wait(gv, 0x6, NULL);
 
     if(*gv != 1) // __cxa_guard_abort was called, let every thread try since there is no return code for this condition
@@ -39,7 +39,7 @@ extern "C" void __cxa_guard_release(int volatile * gv)
     // 2 -> 1
     // 6 -> 1, and wake
     ANDROID_MEMBAR_FULL();
-    if (__atomic_cmpxchg(0x2, 0x1, gv) == 0) {
+    if (__bionic_cmpxchg(0x2, 0x1, gv) == 0) {
         return;
     }
 
diff --git a/linker/linker.c b/linker/linker.c
index 51f28c8..c560507 100644
--- a/linker/linker.c
+++ b/linker/linker.c
@@ -2220,7 +2220,18 @@ static unsigned __linker_init_post_relocation(unsigned **elfdata)
         vecs += 2;
     }
 
-    si->base = (Elf32_Addr) si->phdr - si->phdr->p_vaddr;
+    /* Compute the value of si->base. We can't rely on the fact that
+     * the first entry is the PHDR because this will not be true
+     * for certain executables (e.g. some in the NDK unit test suite)
+     */
+    int nn;
+    si->base = 0;
+    for ( nn = 0; nn < si->phnum; nn++ ) {
+        if (si->phdr[nn].p_type == PT_PHDR) {
+            si->base = (Elf32_Addr) si->phdr - si->phdr[nn].p_vaddr;
+            break;
+        }
+    }
     si->dynamic = (unsigned *)-1;
     si->wrprotect_start = 0xffffffff;
     si->wrprotect_end = 0;