51 files changed, 3244 insertions, 215 deletions
diff --git a/libc/Android.mk b/libc/Android.mk
index 6a77deb..c04a0fc 100644
--- a/libc/Android.mk
+++ b/libc/Android.mk
@@ -279,7 +279,6 @@ libc_common_src_files := \
 	bionic/libc_init_common.c \
 	bionic/logd_write.c \
 	bionic/md5.c \
-	bionic/memmove_words.c \
 	bionic/pututline.c \
 	bionic/realpath.c \
 	bionic/sched_getaffinity.c \
@@ -384,14 +383,46 @@ libc_common_src_files += \
 	arch-arm/bionic/memset.S \
 	arch-arm/bionic/setjmp.S \
 	arch-arm/bionic/sigsetjmp.S \
-	arch-arm/bionic/strlen.c.arm \
 	arch-arm/bionic/strcpy.S \
 	arch-arm/bionic/strcmp.S \
 	arch-arm/bionic/syscall.S \
-	string/memmove.c.arm \
-	string/bcopy.c \
 	string/strncmp.c \
 	unistd/socketcalls.c
+ifeq ($(ARCH_ARM_HAVE_ARMV7A),true)
+libc_common_src_files += arch-arm/bionic/strlen-armv7.S
+else
+libc_common_src_files += arch-arm/bionic/strlen.c.arm
+endif
+
+# Check if we want a neonized version of memmove instead of the
+# current ARM version
+ifeq ($(TARGET_USE_SCORPION_BIONIC_OPTIMIZATION),true)
+libc_common_src_files += \
+	arch-arm/bionic/memmove.S \
+	bionic/memmove_words.c
+else
+ifneq (, $(filter true,$(TARGET_USE_KRAIT_BIONIC_OPTIMIZATION) $(TARGET_USE_SPARROW_BIONIC_OPTIMIZATION)))
+ libc_common_src_files += \
+	arch-arm/bionic/memmove.S
+ else # Other ARM
+ libc_common_src_files += \
+	string/bcopy.c \
+	string/memmove.c.arm \
+	bionic/memmove_words.c
+ endif # !TARGET_USE_KRAIT_BIONIC_OPTIMIZATION
+endif # !TARGET_USE_SCORPION_BIONIC_OPTIMIZATION
+
+# If the kernel supports kernel user helpers for gettimeofday, use
+# that instead.
+ifeq ($(KERNEL_HAS_GETTIMEOFDAY_HELPER),true)
+  libc_common_src_files := $(filter-out arch-arm/syscalls/gettimeofday.S,$(libc_common_src_files))
+  libc_common_src_files := $(filter-out arch-arm/syscalls/clock_gettime.S,$(libc_common_src_files))
+  libc_common_src_files += \
+	arch-arm/bionic/gettimeofday.c \
+	arch-arm/bionic/gettimeofday_syscall.S \
+	arch-arm/bionic/clock_gettime.c \
+	arch-arm/bionic/clock_gettime_syscall.S
+endif # KERNEL_HAS_GETTIMEOFDAY_HELPER
 
 # These files need to be arm so that gdbserver
 # can set breakpoints in them without messing
@@ -436,6 +467,7 @@ libc_common_src_files += \
 	arch-x86/string/strcmp_wrapper.S \
 	arch-x86/string/strncmp_wrapper.S \
 	arch-x86/string/strlen_wrapper.S \
+	bionic/memmove_words.c \
 	string/strcpy.c \
 	bionic/pthread-atfork.c \
 	bionic/pthread-rwlocks.c \
@@ -476,6 +508,9 @@ libc_common_src_files += \
 	arch-mips/string/mips_strlen.c
 
 libc_common_src_files += \
+	bionic/memmove_words.c
+
+libc_common_src_files += \
 	string/bcopy.c \
 	string/memcmp.c \
 	string/strcmp.c \
@@ -555,6 +590,44 @@ ifeq ($(TARGET_ARCH),arm)
   ifeq ($(ARCH_ARM_USE_NON_NEON_MEMCPY),true)
     libc_common_cflags += -DARCH_ARM_USE_NON_NEON_MEMCPY
   endif
+
+  # Add in defines to activate SCORPION_NEON_OPTIMIZATION
+  ifeq ($(TARGET_USE_SCORPION_BIONIC_OPTIMIZATION),true)
+    libc_common_cflags += -DSCORPION_NEON_OPTIMIZATION
+    ifeq ($(TARGET_USE_SCORPION_PLD_SET),true)
+      libc_common_cflags += -DPLDOFFS=$(TARGET_SCORPION_BIONIC_PLDOFFS)
+      libc_common_cflags += -DPLDSIZE=$(TARGET_SCORPION_BIONIC_PLDSIZE)
+    endif
+  endif
+  ifeq ($(TARGET_HAVE_TEGRA_ERRATA_657451),true)
+    libc_common_cflags += -DHAVE_TEGRA_ERRATA_657451
+  endif
+  # Add in defines to activate KRAIT_NEON_OPTIMIZATION
+  ifeq ($(TARGET_USE_KRAIT_BIONIC_OPTIMIZATION),true)
+    libc_common_cflags += -DKRAIT_NEON_OPTIMIZATION
+    ifeq ($(TARGET_USE_KRAIT_PLD_SET),true)
+      libc_common_cflags += -DPLDOFFS=$(TARGET_KRAIT_BIONIC_PLDOFFS)
+      libc_common_cflags += -DPLDTHRESH=$(TARGET_KRAIT_BIONIC_PLDTHRESH)
+      libc_common_cflags += -DPLDSIZE=$(TARGET_KRAIT_BIONIC_PLDSIZE)
+      libc_common_cflags += -DBBTHRESH=$(TARGET_KRAIT_BIONIC_BBTHRESH)
+    endif
+  endif
+  ifeq ($(TARGET_USE_SPARROW_BIONIC_OPTIMIZATION),true)
+    libc_common_cflags += -DSPARROW_NEON_OPTIMIZATION
+  endif
+  ifeq ($(TARGET_CORTEX_CACHE_LINE_32),true)
+    libc_common_cflags += -DCORTEX_CACHE_LINE_32
+  endif
+else # !arm
+  ifeq ($(TARGET_ARCH),x86)
+    libc_crt_target_cflags :=
+    ifeq ($(ARCH_X86_HAVE_SSE2),true)
+        libc_crt_target_cflags += -DUSE_SSE2=1
+    endif
+    ifeq ($(ARCH_X86_HAVE_SSSE3),true)
+        libc_crt_target_cflags += -DUSE_SSSE3=1
+    endif
+  endif # x86
 endif # !arm
 
 ifeq ($(TARGET_ARCH),x86)
diff --git a/libc/arch-arm/bionic/clock_gettime.c b/libc/arch-arm/bionic/clock_gettime.c
new file mode 100644
index 0000000..c2917b0
--- /dev/null
+++ b/libc/arch-arm/bionic/clock_gettime.c
@@ -0,0 +1,94 @@
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *    * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer in the documentation and/or other materials provided
+ *     with the distribution.
+ *   * Neither the name of The Linux Foundation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <string.h>
+#include <stdint.h>
+#include <time.h>
+#include <sys/time.h>
+#include <machine/cpu-features.h>
+#include <machine/kernel_user_helper.h>
+
+int clock_gettime(int clk_id, struct timespec *tp)
+{
+	unsigned prelock, postlock;
+
+	/*
+	 * Check if the offset in the kernel user helper page has
+	 * the flag set appropriately to show that this feature is
+	 * enabled in the kernel.  If not, default to the original
+	 * clock_gettime system call.
+	 *
+	 * Also, if this is anything other than CLOCK_MONOTONIC, route
+	 * to the original system call as well.
+	 */
+	if ((__kuser_gtod_feature != __kuser_gtod_feature_flag) ||
+		(clk_id != CLOCK_MONOTONIC))
+		return clock_gettime_syscall(clk_id, tp);
+
+	if (tp) {
+		struct gtod_t dgtod;
+		uint32_t nscount, cycleoffset;
+		uint32_t mono_sec, mono_nsec;
+		uint64_t cycle_delta;
+
+		do {
+			prelock = __kuser_gtod_seqnum;
+
+			dgtod.cycle_last = __kuser_gtod_cycle_last;
+			dgtod.mask = __kuser_gtod_mask;
+			dgtod.mult = __kuser_gtod_mult;
+			dgtod.shift = __kuser_gtod_shift;
+			dgtod.tv_sec = __kuser_gtod_tv_sec;
+			dgtod.tv_nsec = __kuser_gtod_tv_nsec;
+
+			mono_sec = __kuser_gtod_wtm_tv_sec;
+			mono_nsec = __kuser_gtod_wtm_tv_nsec;
+
+			cycleoffset = __kuser_gtod_offset;
+			cycleoffset += __kuser_gtod_cycle_base;
+			nscount = *(uint32_t *)cycleoffset;
+
+			postlock = __kuser_gtod_seqnum;
+		} while (prelock != postlock);
+
+		cycle_delta = (nscount - dgtod.cycle_last) & dgtod.mask;
+		dgtod.tv_nsec += (cycle_delta * dgtod.mult) >> dgtod.shift;
+		dgtod.tv_sec += mono_sec;
+		dgtod.tv_nsec += mono_nsec;
+		while (dgtod.tv_nsec >= NSEC_PER_SEC) {
+			dgtod.tv_sec += 1;
+			dgtod.tv_nsec -= NSEC_PER_SEC;
+		}
+
+		tp->tv_sec = dgtod.tv_sec;
+		tp->tv_nsec = dgtod.tv_nsec;
+	}
+
+	return 0;
+}
diff --git a/libc/arch-arm/bionic/clock_gettime_syscall.S b/libc/arch-arm/bionic/clock_gettime_syscall.S
new file mode 100644
index 0000000..0b3078a
--- /dev/null
+++ b/libc/arch-arm/bionic/clock_gettime_syscall.S
@@ -0,0 +1,42 @@
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *    * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer in the documentation and/or other materials provided
+ *     with the distribution.
+ *   * Neither the name of The Linux Foundation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <machine/asm.h>
+#include <sys/linux-syscalls.h>
+
+ENTRY(clock_gettime_syscall)
+	.save   {r4, r7}
+	stmfd   sp!, {r4, r7}
+	ldr     r7, =__NR_clock_gettime
+	swi     #0
+	movs    r0, r0
+	ldmfd   sp!, {r4, r7}
+	bmi     __set_syscall_errno
+	bx      lr
+END(clock_gettime_syscall)
diff --git a/libc/arch-arm/bionic/gettimeofday.c b/libc/arch-arm/bionic/gettimeofday.c
new file mode 100644
index 0000000..780d6e8
--- /dev/null
+++ b/libc/arch-arm/bionic/gettimeofday.c
@@ -0,0 +1,99 @@
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *    * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer in the documentation and/or other materials provided
+ *     with the distribution.
+ *   * Neither the name of The Linux Foundation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <string.h>
+#include <stdint.h>
+#include <sys/time.h>
+#include <machine/cpu-features.h>
+#include <machine/kernel_user_helper.h>
+
+int gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+	unsigned prelock, postlock;
+
+	/*
+	 * Check if the offset in the kernel user helper page has
+	 * the flag set appropriately to show that this feature is
+	 * enabled in the kernel.  If not, default to the original
+	 * gettimeofday system call.
+	 */
+	if (__kuser_gtod_feature != __kuser_gtod_feature_flag)
+		return gettimeofday_syscall(tv, tz);
+
+	if (tv) {
+		struct gtod_t dgtod;
+		uint32_t nscount, cycleoffset;
+		uint64_t cycle_delta;
+		uint32_t tmp = 0;
+
+		do {
+			prelock = __kuser_gtod_seqnum;
+
+			dgtod.cycle_last = __kuser_gtod_cycle_last;
+			dgtod.mask = __kuser_gtod_mask;
+			dgtod.mult = __kuser_gtod_mult;
+			dgtod.shift = __kuser_gtod_shift;
+			dgtod.tv_sec = __kuser_gtod_tv_sec;
+			dgtod.tv_nsec = __kuser_gtod_tv_nsec;
+
+			cycleoffset = __kuser_gtod_offset;
+			cycleoffset += __kuser_gtod_cycle_base;
+			nscount = *(uint32_t *)cycleoffset;
+
+			postlock = __kuser_gtod_seqnum;
+		} while (prelock != postlock);
+
+		cycle_delta = (nscount - dgtod.cycle_last) & dgtod.mask;
+		dgtod.tv_nsec += (cycle_delta * dgtod.mult) >> dgtod.shift;
+		while (dgtod.tv_nsec >= NSEC_PER_SEC) {
+			dgtod.tv_sec += 1;
+			dgtod.tv_nsec -= NSEC_PER_SEC;
+		}
+
+		tv->tv_sec = dgtod.tv_sec;
+		asm("   movw   %[tmp], #0x4dd3\n\t"
+		    "   movt   %[tmp], #0x1062\n\t"
+		    "   umull  %[tmp], %[x], %[y], %[tmp]\n\t"
+		    "   lsr    %[x], %[x], #6\n\t" :
+			[x] "=r" (tv->tv_usec) :
+			[y] "r" (dgtod.tv_nsec), [tmp] "r" (tmp)
+			: );
+	}
+
+	if (tz) {
+		do {
+			prelock = __kuser_gtod_seqnum;
+			tz->tz_minuteswest = __kuser_gtod_tz_minw;
+			tz->tz_dsttime = __kuser_gtod_tz_dst;
+			postlock = __kuser_gtod_seqnum;
+		} while (prelock != postlock);
+	}
+
+	return 0;
+}
diff --git a/libc/arch-arm/bionic/gettimeofday_syscall.S b/libc/arch-arm/bionic/gettimeofday_syscall.S
new file mode 100644
index 0000000..3a945e2
--- /dev/null
+++ b/libc/arch-arm/bionic/gettimeofday_syscall.S
@@ -0,0 +1,42 @@
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *    * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer in the documentation and/or other materials provided
+ *     with the distribution.
+ *   * Neither the name of The Linux Foundation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <machine/asm.h>
+#include <sys/linux-syscalls.h>
+
+ENTRY(gettimeofday_syscall)
+	.save   {r4, r7}
+	stmfd   sp!, {r4, r7}
+	ldr     r7, =__NR_gettimeofday
+	swi     #0
+	movs    r0, r0
+	ldmfd   sp!, {r4, r7}
+	bmi     __set_syscall_errno
+	bx      lr
+END(gettimeofday_syscall)
diff --git a/libc/arch-arm/bionic/memcmp.S b/libc/arch-arm/bionic/memcmp.S
index c872a51..781c4f8 100644
--- a/libc/arch-arm/bionic/memcmp.S
+++ b/libc/arch-arm/bionic/memcmp.S
@@ -1,5 +1,6 @@
 /*
- * Copyright (C) 2008 The Android Open Source Project
+ * Copyright (C) 2008, 2011 The Android Open Source Project
+ * Copyright (C) 2010 ST-Ericsson SA
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -30,43 +31,71 @@
 #include <machine/asm.h>
 
 /*
- * Optimized memcmp() for ARM9.
- * This would not be optimal on XScale or ARM11, where more prefetching
- * and use of PLD will be needed.
- * The 2 major optimzations here are
- * (1) The main loop compares 16 bytes at a time
- * (2) The loads are scheduled in a way they won't stall
+ * Optimized memcmp() for ARM9 and Cortex-A9
  */
 
+#if __ARM_ARCH__ >= 7
+#define __ARM_CORTEX
+
+#if defined(CORTEX_CACHE_LINE_32)
+#define CACHE_LINE_SIZE     32
+#else
+#define CACHE_LINE_SIZE     64
+#endif
+
+#endif /* __ARM_ARCH__ */
+
 ENTRY(memcmp)
+#if defined(__ARM_CORTEX)
+        pld         [r0, #(CACHE_LINE_SIZE * 0)]
+        pld         [r0, #(CACHE_LINE_SIZE * 1)]
+#else
         PLD         (r0, #0)
         PLD         (r1, #0)
+#endif
 
         /* take of the case where length is 0 or the buffers are the same */
         cmp         r0, r1
+#if !defined(__ARM_CORTEX)
         cmpne       r2, #0
+#endif
         moveq       r0, #0
         bxeq        lr
 
+#if defined(__ARM_CORTEX)
+        pld         [r1, #(CACHE_LINE_SIZE * 0)]
+        pld         [r1, #(CACHE_LINE_SIZE * 1)]
+
+        /* make sure we have at least 8+4 bytes, this simplify things below
+         * and avoid some overhead for small blocks
+         */
+        cmp        r2, #(8+4)
+        bmi        10f
+#endif /* __ARM_CORTEX */
+
         .save {r4, lr}
         /* save registers */
         stmfd       sp!, {r4, lr}
-        
+
+#if !defined(__ARM_CORTEX)
         PLD         (r0, #32)
         PLD         (r1, #32)
+#endif
 
         /* since r0 hold the result, move the first source
          * pointer somewhere else
          */
          
          mov        r4, r0
-         
+
+#if !defined(__ARM_CORTEX)
          /* make sure we have at least 8+4 bytes, this simplify things below
           * and avoid some overhead for small blocks
           */
          cmp        r2, #(8+4)
          bmi        8f
-        
+#endif
+
         /* align first pointer to word boundary
          * offset = -src & 3
          */
@@ -103,8 +132,14 @@ ENTRY(memcmp)
         subs        r2, r2, #(32 + 4)
         bmi         1f
         
-0:      PLD         (r4, #64)
+0:
+#if defined(__ARM_CORTEX)
+        pld         [r4, #(CACHE_LINE_SIZE * 2)]
+        pld         [r1, #(CACHE_LINE_SIZE * 2)]
+#else
+        PLD         (r4, #64)
         PLD         (r1, #64)
+#endif
         ldr         r0, [r4], #4
         ldr         lr, [r1, #4]!
         eors        r0, r0, ip
@@ -170,6 +205,22 @@ ENTRY(memcmp)
 9:      /* restore registers and return */
         ldmfd       sp!, {r4, lr}
         bx          lr
+
+#if defined(__ARM_CORTEX)
+10:     /* process less than 12 bytes */
+        cmp         r2, #0
+        moveq       r0, #0
+        bxeq        lr
+        mov         r3, r0
+11:
+        ldrb        r0, [r3], #1
+        ldrb        ip, [r1], #1
+        subs        r0, ip
+        bxne        lr
+        subs        r2, r2, #1
+        bne         11b
+        bx          lr
+#endif /* __ARM_CORTEX */
 END(memcmp)
 
 
@@ -192,8 +243,14 @@ END(memcmp)
         bic         r1, r1, #3
         ldr         lr, [r1], #4
 
-6:      PLD         (r1, #64)
+6:
+#if defined(__ARM_CORTEX)
+        pld         [r1, #(CACHE_LINE_SIZE * 2)]
+        pld         [r4, #(CACHE_LINE_SIZE * 2)]
+#else
+        PLD         (r1, #64)
         PLD         (r4, #64)
+#endif
         mov         ip, lr, lsr #16
         ldr         lr, [r1], #4
         ldr         r0, [r4], #4
diff --git a/libc/arch-arm/bionic/memcpy.S b/libc/arch-arm/bionic/memcpy.S
index 8453cc0..80f1bf5 100644
--- a/libc/arch-arm/bionic/memcpy.S
+++ b/libc/arch-arm/bionic/memcpy.S
@@ -2,6 +2,8 @@
  * Copyright (C) 2008 The Android Open Source Project
  * All rights reserved.
  *
+ * Copyright (c) 2009-2011, Code Aurora Forum. All rights reserved.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -30,6 +32,396 @@
 #include <machine/asm.h>
 
 #if defined(__ARM_NEON__) && !defined(ARCH_ARM_USE_NON_NEON_MEMCPY)
+#if defined(KRAIT_NEON_OPTIMIZATION)
+	/*
+	 * These can be overridden in:
+	 *   device/<vendor>/<board>/BoardConfig.mk
+         * by setting the following:
+	 *   TARGET_USE_KRAIT_BIONIC_OPTIMIZATION := true
+	 *   TARGET_USE_KRAIT_PLD_SET := true
+	 *   TARGET_KRAIT_BIONIC_PLDOFFS := <pldoffset>
+	 *   TARGET_KRAIT_BIONIC_PLDSIZE := <pldsize>
+	 *   TARGET_KRAIT_BIONIC_PLDTHRESH := <pldthreshold>
+	 *   TARGET_KRAIT_BIONIC_BBTHRESH := <bbthreshold>
+	 */
+#ifndef PLDOFFS
+#define PLDOFFS	(10)
+#endif
+#ifndef PLDTHRESH
+#define PLDTHRESH (PLDOFFS)
+#endif
+#ifndef BBTHRESH
+#define BBTHRESH (4096/64)
+#endif
+#if (PLDOFFS < 1)
+#error Routine does not support offsets less than 1
+#endif
+#if (PLDTHRESH < PLDOFFS)
+#error PLD threshold must be greater than or equal to the PLD offset
+#endif
+#ifndef PLDSIZE
+#define PLDSIZE	(64)
+#endif
+#define NOP_OPCODE       (0xe320f000)
+
+        .text
+        .fpu    neon
+	.global memcpy
+	.type memcpy, %function
+	.align 5
+memcpy:
+	stmfd	sp!, {r0, r9, r10, lr}
+	cmp	r2, #4
+	blt	.Lneon_lt4
+	cmp	r2, #16
+	blt	.Lneon_lt16
+	cmp	r2, #32
+	blt	.Lneon_16
+	cmp	r2, #64
+	blt	.Lneon_copy_32_a
+
+	mov	r12, r2, lsr #6
+	cmp	r12, #PLDTHRESH
+	ble	.Lneon_copy_64_loop_nopld
+
+	cmp	r12, #BBTHRESH
+	ble	.Lneon_prime_pump
+
+	add	lr, r0, #0x400
+	add	r9, r1, #(PLDOFFS*PLDSIZE)
+	sub	lr, lr, r9
+	lsl	lr, lr, #21
+	lsr	lr, lr, #21
+	add	lr, lr, #(PLDOFFS*PLDSIZE)
+	cmp	r12, lr, lsr #6
+	movle	lr, #(PLDOFFS*PLDSIZE)
+
+	movgt	r9, #(PLDOFFS)
+	rsbgts	r9, r9, lr, lsr #6
+	ble	.Lneon_prime_pump
+
+	add	r10, r1, lr
+	bic	r10, #0x3F
+
+	sub	r12, lr, lsr #6
+	cmp	r9, r12
+	suble	r12, r12, r9
+	movgt	r9, r12
+	movgt	r12, #0
+
+	pld	[r1, #((PLDOFFS-1)*PLDSIZE)]
+	.balignl 64, NOP_OPCODE, 4*2
+.Lneon_copy_64_loop_outer_doublepld:
+	pld	[r1, #((PLDOFFS)*PLDSIZE)]
+	vld1.32	{q0, q1}, [r1]!
+	vld1.32	{q2, q3}, [r1]!
+	ldr	r3, [r10]
+	subs	r9, r9, #1
+	vst1.32	{q0, q1}, [r0]!
+	vst1.32	{q2, q3}, [r0]!
+	add	r10, #64
+	bne	.Lneon_copy_64_loop_outer_doublepld
+	cmp	r12, #0
+	bne	.Lneon_copy_64_loop_outer
+	mov	r12, lr, lsr #6
+	b	.Lneon_copy_64_loop_nopld
+	.balignl 64, NOP_OPCODE, 4*2
+.Lneon_prime_pump:
+	mov	lr, #(PLDOFFS*PLDSIZE)
+	add	r10, r1, #(PLDOFFS*PLDSIZE)
+	bic	r10, #0x3F
+	sub	r12, r12, #PLDOFFS
+	pld	[r10, #(-1*PLDSIZE)]
+	.balignl 64, NOP_OPCODE, 4*2
+.Lneon_copy_64_loop_outer:
+	vld1.32	{q0, q1}, [r1]!
+	vld1.32	{q2, q3}, [r1]!
+	ldr	r3, [r10]
+	subs	r12, r12, #1
+	vst1.32	{q0, q1}, [r0]!
+	vst1.32	{q2, q3}, [r0]!
+	add	r10, #64
+	bne	.Lneon_copy_64_loop_outer
+	mov	r12, lr, lsr #6
+	.balignl 64, NOP_OPCODE, 4*2
+.Lneon_copy_64_loop_nopld:
+	vld1.32	{q8, q9}, [r1]!
+	vld1.32	{q10, q11}, [r1]!
+	subs	r12, r12, #1
+	vst1.32	{q8, q9}, [r0]!
+	vst1.32	{q10, q11}, [r0]!
+	bne	.Lneon_copy_64_loop_nopld
+	ands	r2, r2, #0x3f
+	beq	.Lneon_exit
+	.balignl 64, NOP_OPCODE, 4*2
+.Lneon_copy_32_a:
+	movs	r12, r2, lsl #27
+	bcc	.Lneon_16
+	vld1.32	{q0,q1}, [r1]!
+	vst1.32	{q0,q1}, [r0]!
+	.balignl 64, NOP_OPCODE, 4*2
+.Lneon_16:
+	bpl	.Lneon_lt16
+	vld1.32	{q8}, [r1]!
+	vst1.32	{q8}, [r0]!
+	ands	r2, r2, #0x0f
+	beq	.Lneon_exit
+	.balignl 64, NOP_OPCODE, 4*2
+.Lneon_lt16:
+	movs	r12, r2, lsl #29
+	ldrcs	r3, [r1], #4
+	ldrcs	r12, [r1], #4
+	strcs	r3, [r0], #4
+	strcs	r12, [r0], #4
+	ldrmi	r3, [r1], #4
+	strmi	r3, [r0], #4
+	.balignl 64, NOP_OPCODE, 4*2
+.Lneon_lt4:
+	movs	r2, r2, lsl #31
+	ldrcsh	r3, [r1], #2
+	strcsh	r3, [r0], #2
+	ldrmib	r12, [r1]
+	strmib	r12, [r0]
+	.balignl 64, NOP_OPCODE, 4*2
+.Lneon_exit:
+	ldmfd	sp!, {r0, r9, r10, lr}
+	bx	lr
+	.end
+#elif defined(SCORPION_NEON_OPTIMIZATION)
+	/*
+	 * These can be overridden in:
+	 *   device/<vendor>/<board>/BoardConfig.mk
+         * by setting the following:
+	 *   TARGET_USE_SCORPION_BIONIC_OPTIMIZATION := true
+	 *   TARGET_USE_SCORPION_PLD_SET := true
+	 *   TARGET_SCORPION_BIONIC_PLDOFFS := <pldoffset>
+	 *   TARGET_SCORPION_BIONIC_PLDSIZE := <pldsize>
+	 */
+#ifndef PLDOFFS
+#define PLDOFFS	(6)
+#endif
+#ifndef PLDSIZE
+#define PLDSIZE	(128)	/* L2 cache line size */
+#endif
+        .code 32
+        .align 5
+        .globl memcpy
+        .func
+memcpy:
+	push            {r0}
+	cmp             r2, #4
+	blt             .Lneon_lt4
+	cmp             r2, #16
+	blt             .Lneon_lt16
+	cmp             r2, #32
+	blt             .Lneon_16
+	cmp              r2, #128
+	blt              .Lneon_copy_32_a
+	/* Copy blocks of 128-bytes (word-aligned) at a time*/
+	/* Code below is optimized for PLDSIZE=128 only */
+	mov             r12, r2, lsr #7
+	cmp             r12, #PLDOFFS
+	ble             .Lneon_copy_128_loop_nopld
+	sub             r12, #PLDOFFS
+	pld             [r1, #(PLDOFFS-1)*PLDSIZE]
+.Lneon_copy_128_loop_outer:
+	pld             [r1, #(PLDOFFS*PLDSIZE)]
+	vld1.32         {q0, q1}, [r1]!
+	vld1.32         {q2, q3}, [r1]!
+	vld1.32         {q8, q9}, [r1]!
+	vld1.32         {q10, q11}, [r1]!
+	subs            r12, r12, #1
+	vst1.32	        {q0, q1}, [r0]!
+	vst1.32         {q2, q3}, [r0]!
+	vst1.32         {q8, q9}, [r0]!
+	vst1.32         {q10, q11}, [r0]!
+	bne             .Lneon_copy_128_loop_outer
+	mov             r12, #PLDOFFS
+.Lneon_copy_128_loop_nopld:
+	vld1.32         {q0, q1}, [r1]!
+	vld1.32         {q2, q3}, [r1]!
+	vld1.32         {q8, q9}, [r1]!
+	vld1.32         {q10, q11}, [r1]!
+	subs            r12, r12, #1
+	vst1.32         {q0, q1}, [r0]!
+	vst1.32         {q2, q3}, [r0]!
+	vst1.32         {q8, q9}, [r0]!
+	vst1.32         {q10, q11}, [r0]!
+	bne             .Lneon_copy_128_loop_nopld
+	ands            r2, r2, #0x7f
+	beq             .Lneon_exit
+	cmp             r2, #32
+	blt             .Lneon_16
+	nop
+	/* Copy blocks of 32-bytes (word aligned) at a time*/
+.Lneon_copy_32_a:
+	mov             r12, r2, lsr #5
+.Lneon_copy_32_loop_a:
+	vld1.32         {q0,q1}, [r1]!
+	subs            r12, r12, #1
+	vst1.32         {q0,q1}, [r0]!
+	bne             .Lneon_copy_32_loop_a
+	ands            r2, r2, #0x1f
+	beq             .Lneon_exit
+.Lneon_16:
+	subs            r2, r2, #16
+	blt             .Lneon_lt16
+	vld1.32         {q8}, [r1]!
+	vst1.32         {q8}, [r0]!
+	beq             .Lneon_exit
+.Lneon_lt16:
+	movs            r12, r2, lsl #29
+	bcc             .Lneon_skip8
+	ldr             r3, [r1], #4
+	ldr             r12, [r1], #4
+	str             r3, [r0], #4
+	str             r12, [r0], #4
+.Lneon_skip8:
+	bpl             .Lneon_lt4
+	ldr             r3, [r1], #4
+	str             r3, [r0], #4
+.Lneon_lt4:
+	movs            r2, r2, lsl #31
+	bcc             .Lneon_lt2
+	ldrh            r3, [r1], #2
+	strh            r3, [r0], #2
+.Lneon_lt2:
+	bpl             .Lneon_exit
+	ldrb            r12, [r1]
+	strb            r12, [r0]
+.Lneon_exit:
+	pop             {r0}
+	bx              lr
+	.endfunc
+	.end
+#else
+#if defined(CORTEX_CACHE_LINE_32)
+	/*
+	   *This can be enabled by setting flag
+	   *TARGET_CORTEX_CACHE_LINE_32 in
+	   *device/<vendor>/<board>/BoardConfig.mk
+	*/
+        .text
+        .fpu    neon
+
+        .global memcpy
+        .type memcpy, %function
+        .align 4
+
+/* a prefetch distance of 4 cache-lines works best experimentally */
+#define CACHE_LINE_SIZE     32
+memcpy:
+        .fnstart
+        .save       {r0, lr}
+        stmfd       sp!, {r0, lr}
+
+        /* start preloading as early as possible */
+        pld         [r1, #(CACHE_LINE_SIZE*0)]
+        pld         [r1, #(CACHE_LINE_SIZE*1)]
+
+        /* do we have at least 16-bytes to copy (needed for alignment below) */
+        cmp         r2, #16
+        blo         5f
+
+        /* align destination to half cache-line for the write-buffer */
+        rsb         r3, r0, #0
+        ands        r3, r3, #0xF
+        beq         0f
+
+        /* copy up to 15-bytes (count in r3) */
+        sub         r2, r2, r3
+        movs        ip, r3, lsl #31
+        ldrmib      lr, [r1], #1
+        strmib      lr, [r0], #1
+        ldrcsb      ip, [r1], #1
+        ldrcsb      lr, [r1], #1
+        strcsb      ip, [r0], #1
+        strcsb      lr, [r0], #1
+        movs        ip, r3, lsl #29
+        bge         1f
+        // copies 4 bytes, destination 32-bits aligned
+        vld4.8      {d0[0], d1[0], d2[0], d3[0]}, [r1]!
+        vst4.8      {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]!
+1:      bcc         2f
+        // copies 8 bytes, destination 64-bits aligned
+        vld1.8      {d0}, [r1]!
+        vst1.8      {d0}, [r0, :64]!
+2:
+
+0:      /* preload immediately the next cache line, which we may need */
+        pld         [r1, #(CACHE_LINE_SIZE*0)]
+        pld         [r1, #(CACHE_LINE_SIZE*1)]
+
+        /* make sure we have at least 128 bytes to copy */
+        subs        r2, r2, #128
+        blo         2f
+
+        /* preload all the cache lines we need.
+         * NOTE: the number of pld below depends on PREFETCH_DISTANCE,
+         * ideally would would increase the distance in the main loop to
+         * avoid the goofy code below. In practice this doesn't seem to make
+         * a big difference.
+         */
+        pld         [r1, #(CACHE_LINE_SIZE*2)]
+        pld         [r1, #(CACHE_LINE_SIZE*3)]
+        pld         [r1, #(CACHE_LINE_SIZE*4)]
+
+	.align 3
+1:      /* The main loop copies 128 bytes at a time */
+	subs        r2, r2, #128
+        vld1.8      {d0  - d3},   [r1]!
+        vld1.8      {d4  - d7},   [r1]!
+        pld         [r1, #(CACHE_LINE_SIZE*1)]
+        pld         [r1, #(CACHE_LINE_SIZE*2)]
+	vld1.8	    {d16 - d19},   [r1]!
+	vld1.8	    {d20 - d23},   [r1]!
+        pld         [r1, #(CACHE_LINE_SIZE*1)]
+        pld         [r1, #(CACHE_LINE_SIZE*2)]
+        vst1.8      {d0  - d3},   [r0, :128]!
+        vst1.8      {d4  - d7},   [r0, :128]!
+        vst1.8      {d16  - d19},   [r0, :128]!
+        vst1.8      {d20  - d23},   [r0, :128]!
+        bhs         1b
+
+2:      /* fix-up the remaining count and make sure we have >= 32 bytes left */
+        add         r2, r2, #128
+        subs        r2, r2, #32
+        blo         4f
+
+3:      /* 32 bytes at a time. These cache lines were already preloaded */
+        vld1.8      {d0 - d3},  [r1]!
+        subs        r2, r2, #32
+        vst1.8      {d0 - d3},  [r0, :128]!
+        bhs         3b
+
+4:      /* less than 32 left */
+        add         r2, r2, #32
+        tst         r2, #0x10
+        beq         5f
+        // copies 16 bytes, 128-bits aligned
+        vld1.8      {d0, d1}, [r1]!
+        vst1.8      {d0, d1}, [r0, :128]!
+
+5:      /* copy up to 15-bytes (count in r2) */
+        movs        ip, r2, lsl #29
+        bcc         1f
+        vld1.8      {d0}, [r1]!
+        vst1.8      {d0}, [r0]!
+1:      bge         2f
+        vld4.8      {d0[0], d1[0], d2[0], d3[0]}, [r1]!
+        vst4.8      {d0[0], d1[0], d2[0], d3[0]}, [r0]!
+2:      movs        ip, r2, lsl #31
+        ldrmib      r3, [r1], #1
+        ldrcsb      ip, [r1], #1
+        ldrcsb      lr, [r1], #1
+        strmib      r3, [r0], #1
+        strcsb      ip, [r0], #1
+        strcsb      lr, [r0], #1
+
+        ldmfd       sp!, {r0, lr}
+        bx          lr
+        .fnend
+#else /*!CORTEX_CACHE_LINE_32*/
 
         .text
         .fpu    neon
@@ -165,8 +557,8 @@ ENTRY(memcpy)
         ldmfd       sp!, {r0, lr}
         bx          lr
 END(memcpy)
-
-
+#endif  /* CORTEX_CACHE_LINE_32 */
+#endif  /* !SCORPION_NEON_OPTIMIZATION */
 #else   /* __ARM_ARCH__ < 7 */
 
 
diff --git a/libc/arch-arm/bionic/memmove.S b/libc/arch-arm/bionic/memmove.S
new file mode 100644
index 0000000..937d14b
--- /dev/null
+++ b/libc/arch-arm/bionic/memmove.S
@@ -0,0 +1,526 @@
+/***************************************************************************
+ Copyright (c) 2009-2012 Code Aurora Forum. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+     * Redistributions of source code must retain the above copyright
+       notice, this list of conditions and the following disclaimer.
+     * Redistributions in binary form must reproduce the above copyright
+       notice, this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+     * Neither the name of Code Aurora nor the names of its contributors may
+       be used to endorse or promote products derived from this software
+       without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+  ***************************************************************************/
+
+/***************************************************************************
+ *  Neon memmove: Attempts to do a memmove with Neon registers if possible,
+ *     Inputs:
+ *        dest: The destination buffer
+ *        src: The source buffer
+ *        n: The size of the buffer to transfer
+ *     Outputs:
+ *
+ ***************************************************************************/
+
+#include <machine/cpu-features.h>
+
+#if defined(KRAIT_NEON_OPTIMIZATION) || defined(SPARROW_NEON_OPTIMIZATION)
+	/*
+	 * These can be overridden in:
+	 *   device/<vendor>/<board>/BoardConfig.mk
+	 * by setting the following:
+	 *   TARGET_USE_KRAIT_BIONIC_OPTIMIZATION := true
+	 *   TARGET_USE_KRAIT_PLD_SET := true
+	 *   TARGET_KRAIT_BIONIC_PLDOFFS := <pldoffset>
+	 *   TARGET_KRAIT_BIONIC_PLDSIZE := <pldsize>
+	 *   TARGET_KRAIT_BIONIC_PLDTHRESH := <pldthreshold>
+	 */
+#ifndef PLDOFFS
+#define PLDOFFS	(10)
+#endif
+#ifndef PLDTHRESH
+#define PLDTHRESH (PLDOFFS)
+#endif
+#if (PLDOFFS < 5)
+#error Routine does not support offsets less than 5
+#endif
+#if (PLDTHRESH < PLDOFFS)
+#error PLD threshold must be greater than or equal to the PLD offset
+#endif
+#ifndef PLDSIZE
+#define PLDSIZE (64)
+#endif
+#define NOP_OPCODE (0xe320f000)
+
+	.code 32
+	.align 5
+	.global memmove
+	.type memmove, %function
+
+	.global _memmove_words
+	.type _memmove_words, %function
+
+	.global bcopy
+	.type bcopy, %function
+
+bcopy:
+	mov	r12, r0
+	mov	r0, r1
+	mov	r1, r12
+	.balignl 64, NOP_OPCODE, 4*2
+memmove:
+_memmove_words:
+.Lneon_memmove_cmf:
+	subs	r12, r0, r1
+	bxeq	lr
+	cmphi	r2, r12
+	bls	memcpy	/* Use memcpy for non-overlapping areas */
+
+	push	{r0}
+
+.Lneon_back_to_front_copy:
+	add	r0, r0, r2
+	add	r1, r1, r2
+	cmp	r2, #4
+	bgt	.Lneon_b2f_gt4
+	cmp	r2, #0
+.Lneon_b2f_smallcopy_loop:
+	beq	.Lneon_memmove_done
+	ldrb	r12, [r1, #-1]!
+	subs	r2, r2, #1
+	strb	r12, [r0, #-1]!
+	b	.Lneon_b2f_smallcopy_loop
+.Lneon_b2f_gt4:
+	sub	r3, r0, r1
+	cmp	r2, r3
+	movle	r12, r2
+	movgt	r12, r3
+	cmp	r12, #64
+	bge	.Lneon_b2f_copy_64
+	cmp	r12, #32
+	bge	.Lneon_b2f_copy_32
+	cmp	r12, #8
+	bge	.Lneon_b2f_copy_8
+	cmp	r12, #4
+	bge	.Lneon_b2f_copy_4
+	b	.Lneon_b2f_copy_1
+.Lneon_b2f_copy_64:
+	sub	r1, r1, #64	/* Predecrement */
+	sub	r0, r0, #64
+	movs	r12, r2, lsr #6
+	cmp	r12, #PLDTHRESH
+	ble	.Lneon_b2f_copy_64_loop_nopld
+	sub	r12, #PLDOFFS
+	pld	[r1, #-(PLDOFFS-5)*PLDSIZE]
+	pld	[r1, #-(PLDOFFS-4)*PLDSIZE]
+	pld	[r1, #-(PLDOFFS-3)*PLDSIZE]
+	pld	[r1, #-(PLDOFFS-2)*PLDSIZE]
+	pld	[r1, #-(PLDOFFS-1)*PLDSIZE]
+	.balignl 64, NOP_OPCODE, 4*2
+.Lneon_b2f_copy_64_loop_outer:
+	pld	[r1, #-(PLDOFFS)*PLDSIZE]
+	vld1.32	{q0, q1}, [r1]!
+	vld1.32	{q2, q3}, [r1]
+	subs	r12, r12, #1
+	vst1.32	{q0, q1}, [r0]!
+	sub	r1, r1, #96	/* Post-fixup and predecrement */
+	vst1.32	{q2, q3}, [r0]
+	sub	r0, r0, #96
+	bne	.Lneon_b2f_copy_64_loop_outer
+	mov	r12, #PLDOFFS
+	.balignl 64, NOP_OPCODE, 4*2
+.Lneon_b2f_copy_64_loop_nopld:
+	vld1.32	{q8, q9}, [r1]!
+	vld1.32	{q10, q11}, [r1]
+	subs	r12, r12, #1
+	vst1.32	{q8, q9}, [r0]!
+	sub	r1, r1, #96	/* Post-fixup and predecrement */
+	vst1.32	{q10, q11}, [r0]
+	sub	r0, r0, #96
+	bne	.Lneon_b2f_copy_64_loop_nopld
+	ands	r2, r2, #0x3f
+	beq	.Lneon_memmove_done
+	add	r1, r1, #64	/* Post-fixup */
+	add	r0, r0, #64
+	cmp	r2, #32
+	blt	.Lneon_b2f_copy_finish
+.Lneon_b2f_copy_32:
+	mov	r12, r2, lsr #5
+.Lneon_b2f_copy_32_loop:
+	sub	r1, r1, #32	/* Predecrement */
+	sub	r0, r0, #32
+	vld1.32	{q0,q1}, [r1]
+	subs	r12, r12, #1
+	vst1.32	{q0,q1}, [r0]
+	bne	.Lneon_b2f_copy_32_loop
+	ands	r2, r2, #0x1f
+	beq	.Lneon_memmove_done
+.Lneon_b2f_copy_finish:
+.Lneon_b2f_copy_8:
+	movs	r12, r2, lsr #0x3
+	beq	.Lneon_b2f_copy_4
+	.balignl 64, NOP_OPCODE, 4*2
+.Lneon_b2f_copy_8_loop:
+	sub	r1, r1, #8	/* Predecrement */
+	sub	r0, r0, #8
+	vld1.32	{d0}, [r1]
+	subs	r12, r12, #1
+	vst1.32	{d0}, [r0]
+	bne	.Lneon_b2f_copy_8_loop
+	ands	r2, r2, #0x7
+	beq	.Lneon_memmove_done
+.Lneon_b2f_copy_4:
+	movs	r12, r2, lsr #0x2
+	beq	.Lneon_b2f_copy_1
+.Lneon_b2f_copy_4_loop:
+	ldr	r3, [r1, #-4]!
+	subs	r12, r12, #1
+	str	r3, [r0, #-4]!
+	bne	.Lneon_b2f_copy_4_loop
+	ands	r2, r2, #0x3
+.Lneon_b2f_copy_1:
+	cmp	r2, #0
+	beq	.Lneon_memmove_done
+	.balignl 64, NOP_OPCODE, 4*2
+.Lneon_b2f_copy_1_loop:
+	ldrb	r12, [r1, #-1]!
+	subs	r2, r2, #1
+	strb	r12, [r0, #-1]!
+	bne	.Lneon_b2f_copy_1_loop
+
+.Lneon_memmove_done:
+	pop	{r0}
+	bx	lr
+
+	.end
+
+#elif defined(SCORPION_NEON_OPTIMIZATION)
+	/*
+	 * These can be overridden in:
+	 *   device/<vendor>/<board>/BoardConfig.mk
+         * by setting the following:
+	 *   TARGET_USE_SCORPION_BIONIC_OPTIMIZATION := true
+	 *   TARGET_USE_SCORPION_PLD_SET := true
+	 *   TARGET_SCORPION_BIONIC_PLDOFFS := <pldoffset>
+	 *   TARGET_SCORPION_BIONIC_PLDSIZE := <pldsize>
+	 */
+#ifndef PLDOFFS
+#define PLDOFFS	(6)
+#endif
+#ifndef PLDSIZE
+#define PLDSIZE	(128)	/* L2 cache line size */
+#endif
+
+	.code 32
+	.align 5
+	.global memmove
+	.type memmove, %function
+
+	.global bcopy
+	.type bcopy, %function
+
+bcopy:
+	mov		r12, r0
+	mov		r0, r1
+	mov		r1, r12
+memmove:
+	push            {r0}
+
+	/*
+	 * The requirements for memmove state that the function should
+	 * operate as if data were being copied from the source to a
+	 * buffer, then to the destination.  This is to allow a user
+	 * to copy data from a source and target that overlap.
+	 *
+	 * We can't just do byte copies front-to-back automatically, since
+	 * there's a good chance we may have an overlap (why else would someone
+	 * intentionally use memmove then?).
+	 *
+	 * We'll break this into two parts.  Front-to-back, or back-to-front
+	 * copies.
+	 */
+.Lneon_memmove_cmf:
+	cmp             r0, r1
+	blt             .Lneon_front_to_back_copy
+	bgt             .Lneon_back_to_front_copy
+	b               .Lneon_memmove_done
+
+	/* #############################################################
+	 * Front to Back copy
+	 */
+.Lneon_front_to_back_copy:
+	/*
+	 * For small copies, just do a quick memcpy.  We can do this for
+	 * front-to-back copies, aligned or unaligned, since we're only
+	 * doing 1 byte at a time...
+	 */
+	cmp             r2, #4
+	bgt             .Lneon_f2b_gt4
+	cmp             r2, #0
+.Lneon_f2b_smallcopy_loop:
+	beq             .Lneon_memmove_done
+	ldrb            r12, [r1], #1
+	subs            r2, r2, #1
+	strb            r12, [r0], #1
+	b               .Lneon_f2b_smallcopy_loop
+.Lneon_f2b_gt4:
+	/* The window size is in r3. */
+	sub             r3, r1, r0
+	/* #############################################################
+	 * Front to Back copy 
+	 */
+	/*
+	 * Note that we can't just route based on the size in r2.  If that's
+	 * larger than the overlap window in r3, we could potentially
+	 * (and likely!) destroy data we're copying.
+	 */
+	cmp             r2, r3
+	movle           r12, r2
+	movgt           r12, r3
+	cmp             r12, #256
+	bge             .Lneon_f2b_copy_128
+	cmp             r12, #64
+	bge             .Lneon_f2b_copy_32
+	cmp             r12, #16
+	bge             .Lneon_f2b_copy_16
+	cmp             r12, #8
+	bge             .Lneon_f2b_copy_8
+	cmp             r12, #4
+	bge             .Lneon_f2b_copy_4
+	b               .Lneon_f2b_copy_1
+	nop
+.Lneon_f2b_copy_128:
+	mov             r12, r2, lsr #7
+	cmp             r12, #PLDOFFS
+	ble             .Lneon_f2b_copy_128_loop_nopld
+	sub             r12, #PLDOFFS
+	pld             [r1, #(PLDOFFS-1)*PLDSIZE]
+.Lneon_f2b_copy_128_loop_outer:
+	pld             [r1, #(PLDOFFS*PLDSIZE)]
+	vld1.32         {q0,q1}, [r1]!
+	vld1.32         {q2,q3}, [r1]!
+	vld1.32         {q8,q9}, [r1]!
+	vld1.32         {q10,q11}, [r1]!
+	subs            r12, r12, #1
+	vst1.32         {q0,q1}, [r0]!
+	vst1.32         {q2,q3}, [r0]!
+	vst1.32         {q8,q9}, [r0]!
+	vst1.32         {q10,q11}, [r0]!
+	bne             .Lneon_f2b_copy_128_loop_outer
+	mov             r12, #PLDOFFS
+.Lneon_f2b_copy_128_loop_nopld:
+	vld1.32         {q0,q1}, [r1]!
+	vld1.32         {q2,q3}, [r1]!
+	vld1.32         {q8,q9}, [r1]!
+	vld1.32         {q10,q11}, [r1]!
+	subs            r12, r12, #1
+	vst1.32         {q0,q1}, [r0]!
+	vst1.32         {q2,q3}, [r0]!
+	vst1.32         {q8,q9}, [r0]!
+	vst1.32         {q10,q11}, [r0]!
+	bne             .Lneon_f2b_copy_128_loop_nopld
+	ands            r2, r2, #0x7f
+	beq             .Lneon_memmove_done
+	cmp             r2, #32
+	bge             .Lneon_f2b_copy_32
+	b               .Lneon_f2b_copy_finish
+.Lneon_f2b_copy_32:
+	mov             r12, r2, lsr #5
+.Lneon_f2b_copy_32_loop:
+	vld1.32         {q0,q1}, [r1]!
+	subs            r12, r12, #1
+	vst1.32         {q0,q1}, [r0]!
+	bne             .Lneon_f2b_copy_32_loop
+	ands            r2, r2, #0x1f
+	beq             .Lneon_memmove_done
+.Lneon_f2b_copy_finish:
+.Lneon_f2b_copy_16:
+	movs            r12, r2, lsr #4
+	beq             .Lneon_f2b_copy_8
+.Lneon_f2b_copy_16_loop:
+	vld1.32         {q0}, [r1]!
+	subs            r12, r12, #1
+	vst1.32         {q0}, [r0]!
+	bne             .Lneon_f2b_copy_16_loop
+	ands            r2, r2, #0xf
+	beq             .Lneon_memmove_done
+.Lneon_f2b_copy_8:
+	movs            r12, r2, lsr #3
+	beq             .Lneon_f2b_copy_4
+.Lneon_f2b_copy_8_loop:
+	vld1.32         {d0}, [r1]!
+	subs            r12, r12, #1
+	vst1.32         {d0}, [r0]!
+	bne             .Lneon_f2b_copy_8_loop
+	ands            r2, r2, #0x7
+	beq             .Lneon_memmove_done
+.Lneon_f2b_copy_4:
+	movs            r12, r2, lsr #2
+	beq             .Lneon_f2b_copy_1
+.Lneon_f2b_copy_4_loop:
+	ldr             r3, [r1], #4
+	subs            r12, r12, #1
+	str             r3, [r0], #4
+	bne             .Lneon_f2b_copy_4_loop
+	ands            r2, r2, #0x3
+	nop
+.Lneon_f2b_copy_1:
+	cmp             r2, #0
+	beq             .Lneon_memmove_done
+.Lneon_f2b_copy_1_loop:
+	ldrb            r12, [r1], #1
+	subs            r2, r2, #1
+	strb            r12, [r0], #1
+	bne             .Lneon_f2b_copy_1_loop
+.Lneon_f2b_finish:
+	b               .Lneon_memmove_done
+
+	/* #############################################################
+	 * Back to Front copy
+	 */
+.Lneon_back_to_front_copy:
+	/*
+	 * Here, we'll want to shift to the end of the buffers.  This
+	 * actually points us one past where we need to go, but since
+	 * we'll pre-decrement throughout, this will be fine.
+	 */
+	add             r0, r0, r2
+	add             r1, r1, r2
+	cmp             r2, #4
+	bgt             .Lneon_b2f_gt4
+	cmp             r2, #0
+.Lneon_b2f_smallcopy_loop:
+	beq             .Lneon_memmove_done
+	ldrb            r12, [r1, #-1]!
+	subs            r2, r2, #1
+	strb            r12, [r0, #-1]!
+	b               .Lneon_b2f_smallcopy_loop
+.Lneon_b2f_gt4:
+	/*
+	 * The minimum of the overlap window size and the copy size
+	 * is in r3.
+	 */
+	sub             r3, r0, r1
+	/*
+	 * #############################################################
+	 * Back to Front copy -
+	 */
+	cmp             r2, r3
+	movle           r12, r2
+	movgt           r12, r3
+	cmp             r12, #256
+	bge             .Lneon_b2f_copy_128
+	cmp             r12, #64
+	bge             .Lneon_b2f_copy_32
+	cmp             r12, #8
+	bge             .Lneon_b2f_copy_8
+	cmp             r12, #4
+	bge             .Lneon_b2f_copy_4
+	b               .Lneon_b2f_copy_1
+	nop
+.Lneon_b2f_copy_128:
+	movs            r12, r2, lsr #7
+	cmp             r12, #PLDOFFS
+	ble             .Lneon_b2f_copy_128_loop_nopld
+	sub             r12, #PLDOFFS
+	pld             [r1, #-(PLDOFFS-1)*PLDSIZE]
+.Lneon_b2f_copy_128_loop_outer:
+	pld             [r1, #-(PLDOFFS*PLDSIZE)]
+	sub             r1, r1, #128
+	sub             r0, r0, #128
+	vld1.32         {q0, q1}, [r1]!
+	vld1.32         {q2, q3}, [r1]!
+	vld1.32         {q8, q9}, [r1]!
+	vld1.32         {q10, q11}, [r1]!
+	subs            r12, r12, #1
+	vst1.32         {q0, q1}, [r0]!
+	vst1.32         {q2, q3}, [r0]!
+	vst1.32         {q8, q9}, [r0]!
+	vst1.32         {q10, q11}, [r0]!
+	sub             r1, r1, #128
+	sub             r0, r0, #128
+	bne             .Lneon_b2f_copy_128_loop_outer
+	mov             r12, #PLDOFFS
+.Lneon_b2f_copy_128_loop_nopld:
+	sub             r1, r1, #128
+	sub             r0, r0, #128
+	vld1.32         {q0, q1}, [r1]!
+	vld1.32         {q2, q3}, [r1]!
+	vld1.32         {q8, q9}, [r1]!
+	vld1.32         {q10, q11}, [r1]!
+	subs            r12, r12, #1
+	vst1.32         {q0, q1}, [r0]!
+	vst1.32         {q2, q3}, [r0]!
+	vst1.32         {q8, q9}, [r0]!
+	vst1.32         {q10, q11}, [r0]!
+	sub             r1, r1, #128
+	sub             r0, r0, #128
+	bne             .Lneon_b2f_copy_128_loop_nopld
+	ands            r2, r2, #0x7f
+	beq             .Lneon_memmove_done
+	cmp             r2, #32
+	bge             .Lneon_b2f_copy_32
+	b               .Lneon_b2f_copy_finish
+.Lneon_b2f_copy_32:
+	mov             r12, r2, lsr #5
+.Lneon_b2f_copy_32_loop:
+	sub             r1, r1, #32
+	sub             r0, r0, #32
+	vld1.32         {q0,q1}, [r1]
+	subs            r12, r12, #1
+	vst1.32         {q0,q1}, [r0]
+	bne             .Lneon_b2f_copy_32_loop
+	ands            r2, r2, #0x1f
+	beq             .Lneon_memmove_done
+.Lneon_b2f_copy_finish:
+.Lneon_b2f_copy_8:
+	movs            r12, r2, lsr #0x3
+	beq             .Lneon_b2f_copy_4
+.Lneon_b2f_copy_8_loop:
+	sub             r1, r1, #8
+	sub             r0, r0, #8
+	vld1.32         {d0}, [r1]
+	subs            r12, r12, #1
+	vst1.32         {d0}, [r0]
+	bne             .Lneon_b2f_copy_8_loop
+	ands            r2, r2, #0x7
+	beq             .Lneon_memmove_done
+.Lneon_b2f_copy_4:
+	movs            r12, r2, lsr #0x2
+	beq             .Lneon_b2f_copy_1
+.Lneon_b2f_copy_4_loop:
+	ldr             r3, [r1, #-4]!
+	subs            r12, r12, #1
+	str             r3, [r0, #-4]!
+	bne             .Lneon_b2f_copy_4_loop
+	ands            r2, r2, #0x3
+	nop
+.Lneon_b2f_copy_1:
+	cmp             r2, #0
+	beq             .Lneon_memmove_done
+.Lneon_b2f_copy_1_loop:
+	ldrb            r12, [r1, #-1]!
+	subs            r2, r2, #1
+	strb            r12, [r0, #-1]!
+	bne             .Lneon_b2f_copy_1_loop
+
+.Lneon_memmove_done:
+	pop             {r0}
+	bx              lr
+
+	.end
+#endif /* SCORPION_NEON_OPTIMIZATION */
+
diff --git a/libc/arch-arm/bionic/memset.S b/libc/arch-arm/bionic/memset.S
index 273b9e3..c386e7e 100644
--- a/libc/arch-arm/bionic/memset.S
+++ b/libc/arch-arm/bionic/memset.S
@@ -2,6 +2,8 @@
  * Copyright (C) 2008 The Android Open Source Project
  * All rights reserved.
  *
+ * Copyright (c) 2009-2011, Code Aurora Forum. All rights reserved.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -27,6 +29,90 @@
  */
 
 #include <machine/asm.h>
+
+#if( defined(SCORPION_NEON_OPTIMIZATION) || defined(CORTEX_CACHE_LINE_32))
+	.code 32
+	.align 8
+	.global memset
+	.type memset, %function
+
+	.global bzero
+	.type bzero, %function
+
+bzero:
+	mov             r2, r1
+	mov             r1, #0	
+memset:
+	push            {r0}
+
+	cmp             r2, #6
+	bgt             .Lmemset_gt6
+	cmp             r2, #0
+	beq             .Lmemset_smallcopy_done
+.Lmemset_smallcopy_loop:
+	strb            r1, [r0], #1
+	subs            r2, r2, #1
+	bne             .Lmemset_smallcopy_loop
+.Lmemset_smallcopy_done:
+	pop             {r0}
+	bx              lr
+
+.Lmemset_gt6:
+	vdup.8		q0, r1
+	vmov		r1, s0
+
+	/*
+	 * Decide where to route for the maximum copy sizes.
+	 */
+	cmp             r2, #4
+	blt             .Lmemset_lt4
+	cmp             r2, #16
+	blt             .Lmemset_lt16
+	vmov            q1, q0
+	cmp             r2, #128
+	blt             .Lmemset_32
+.Lmemset_128:
+	mov             r12, r2, lsr #7
+.Lmemset_128_loop:
+	vst1.32         {q0, q1}, [r0]!
+	vst1.32         {q0, q1}, [r0]!
+	vst1.32         {q0, q1}, [r0]!
+	vst1.32         {q0, q1}, [r0]!
+	subs            r12, r12, #1
+	bne             .Lmemset_128_loop
+	ands            r2, r2, #0x7f
+	beq             .Lmemset_end
+.Lmemset_32:
+	movs             r12, r2, lsr #5
+	beq              .Lmemset_lt32
+.Lmemset_32_loop:
+	subs            r12, r12, #1
+	vst1.32         {q0, q1}, [r0]!
+	bne             .Lmemset_32_loop
+	ands            r2, r2, #0x1f
+	beq             .Lmemset_end
+.Lmemset_lt32:
+	cmp             r2, #16
+	blt             .Lmemset_lt16
+	vst1.64         {q0}, [r0]!
+	subs            r2, r2, #16
+	beq             .Lmemset_end
+.Lmemset_lt16:
+	movs            r12, r2, lsl #29
+	strcs           r1, [r0], #4
+	strcs           r1, [r0], #4
+	strmi           r1, [r0], #4
+.Lmemset_lt4:
+	movs            r2, r2, lsl #31
+	strcsh          r1, [r0], #2
+	strmib          r1, [r0]
+.Lmemset_end:
+	pop             {r0}
+	bx		lr
+
+	.end
+#else   /* !(SCORPION_NEON_OPTIMIZATION || CORTEX_CACHE_LINE_32) */
+
 	
 		/*
 		 * Optimized memset() for ARM.
@@ -107,3 +193,5 @@ ENTRY(memset)
         ldmfd		sp!, {r0, r4-r7, lr}
         bx          lr
 END(memset)
+    
+#endif  /* SCORPION_NEON_OPTIMIZATION */
diff --git a/libc/arch-arm/bionic/strlen-armv7.S b/libc/arch-arm/bionic/strlen-armv7.S
new file mode 100644
index 0000000..125e92f
--- /dev/null
+++ b/libc/arch-arm/bionic/strlen-armv7.S
@@ -0,0 +1,111 @@
+/* Copyright (c) 2010-2011, Linaro Limited
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+      * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+      * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+      * Neither the name of Linaro Limited nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   Written by Dave Gilbert <david.gilbert@linaro.org>
+
+   This strlen routine is optimised on a Cortex-A9 and should work on
+   all ARMv7 processors.   This routine is reasonably fast for short
+   strings, but is probably slower than a simple implementation if all
+   your strings are very short */
+
+@ 2011-02-08 david.gilbert@linaro.org
+@    Extracted from local git 6848613a
+
+
+@ this lets us check a flag in a 00/ff byte easily in either endianness
+#ifdef __ARMEB__
+#define CHARTSTMASK(c) 1<<(31-(c*8))
+#else
+#define CHARTSTMASK(c) 1<<(c*8)
+#endif
+
+@-----------------------------------------------------------------------------------------------------------------------------
+	.syntax unified
+	.arch armv7-a
+
+	.thumb_func
+	.align 2
+	.p2align 4,,15
+	.global strlen
+	.type strlen,%function
+strlen:
+	@ r0 = string
+	@ returns count of bytes in string not including terminator
+	mov	r1, r0
+	push	{ r4,r6 }
+	mvns	r6, #0		@ all F
+	movs	r4, #0
+	tst	r0, #7
+	beq	2f
+
+1:
+	ldrb	r2, [r1], #1
+	tst	r1, #7		@ Hit alignment yet?
+	cbz	r2, 10f		@ Exit if we found the 0
+	bne	1b
+
+	@ So we're now aligned
+2:
+	ldmia	r1!,{r2,r3}
+	uadd8	r2, r2, r6	@ Parallel add 0xff - sets the GE bits for anything that wasn't 0
+	sel	r2, r4, r6	@ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
+	uadd8	r3, r3, r6	@ Parallel add 0xff - sets the GE bits for anything that wasn't 0
+	sel	r3, r2, r6	@ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
+	cmp	r3, #0
+	beq	2b
+
+strlenendtmp:
+	@ One (or more) of the bytes we loaded was 0 - but which one?
+	@ r2 has the mask corresponding to the first loaded word
+	@ r3 has a combined mask of the two words - but if r2 was all-non 0 
+	@ then it's just the 2nd words
+	cmp	r2, #0
+	itte	eq
+	moveq	r2, r3		@ the end is in the 2nd word
+	subeq	r1,r1,#3
+	subne	r1,r1,#7
+
+	@ r1 currently points to the 2nd byte of the word containing the 0
+	tst	r2, # CHARTSTMASK(0)	@ 1st character
+	bne	10f
+	adds	r1,r1,#1
+	tst	r2, # CHARTSTMASK(1)	@ 2nd character
+	ittt	eq
+	addeq	r1,r1,#1
+	tsteq	r2, # (3<<15)	@ 2nd & 3rd character
+	@ If not the 3rd must be the last one
+	addeq	r1,r1,#1
+
+10:
+	@ r0 is still at the beginning, r1 is pointing 1 byte after the terminator
+	sub	r0, r1, r0
+	subs	r0, r0, #1
+	pop	{ r4, r6 }
+	bx	lr
diff --git a/libc/arch-arm/include/machine/kernel_user_helper.h b/libc/arch-arm/include/machine/kernel_user_helper.h
new file mode 100644
index 0000000..8836c50
--- /dev/null
+++ b/libc/arch-arm/include/machine/kernel_user_helper.h
@@ -0,0 +1,68 @@
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *    * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer in the documentation and/or other materials provided
+ *     with the distribution.
+ *   * Neither the name of The Linux Foundation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _ARCH_ARM_KERNEL_USER_HELPER_H
+#define _ARCH_ARM_KERNEL_USER_HELPER_H
+
+extern int clock_gettime_syscall(int clk_id, struct timespec *tp);
+extern int gettimeofday_syscall(struct timeval *tv, struct timezone *tz);
+
+#define __kuser_gtod_base        (*(int32_t *)0xffff0f40)
+#define __kuser_gtod_cycle_last  (*(int32_t *)0xffff0f40)
+#define __kuser_gtod_mask        (*(int32_t *)0xffff0f48)
+#define __kuser_gtod_mult        (*(int32_t *)0xffff0f50)
+#define __kuser_gtod_shift       (*(int32_t *)0xffff0f54)
+#define __kuser_gtod_tv_sec      (*(int32_t *)0xffff0f58)
+#define __kuser_gtod_tv_nsec     (*(int32_t *)0xffff0f5c)
+
+#define __kuser_gtod_seqnum      (*(int32_t *)0xffff0f28)
+#define __kuser_gtod_offset      (*(int32_t *)0xffff0f30)
+#define __kuser_gtod_cycle_base               0xfffef000
+#define __kuser_gtod_feature     (*(int32_t *)0xffff0f34)
+#define __kuser_gtod_feature_flag             0xffff0f20
+
+#define __kuser_gtod_wtm_tv_sec  (*(int32_t *)0xffff0f38)
+#define __kuser_gtod_wtm_tv_nsec (*(int32_t *)0xffff0f3c)
+
+#define __kuser_gtod_timezone    (*(int32_t *)0xffff0f20)
+#define __kuser_gtod_tz_minw     (*(int32_t *)0xffff0f20)
+#define __kuser_gtod_tz_dst      (*(int32_t *)0xffff0f24)
+
+struct gtod_t {
+	uint64_t  cycle_last;
+	uint64_t  mask;
+	uint32_t  mult;
+	uint32_t  shift;
+	uint32_t  tv_sec;
+	uint32_t  tv_nsec;
+};
+
+#define NSEC_PER_SEC	1000000000L
+
+#endif
diff --git a/libc/bionic/md5.c b/libc/bionic/md5.c
index ba4aaed..02785bd 100644
--- a/libc/bionic/md5.c
+++ b/libc/bionic/md5.c
@@ -231,7 +231,7 @@ MD5_Update (struct md5 *m, const void *v, size_t len)
       }
       calc(m, current);
 #else
-      calc(m, (u_int32_t*)m->save);
+      calc(m, m->save32);
 #endif
       offset = 0;
     }
diff --git a/libc/bionic/md5.h b/libc/bionic/md5.h
index a381994..079ed84 100644
--- a/libc/bionic/md5.h
+++ b/libc/bionic/md5.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1995 - 2001 Kungliga Tekniska H�gskolan
+ * Copyright (c) 1995 - 2001 Kungliga Tekniska H?gskolan
  * (Royal Institute of Technology, Stockholm, Sweden).
  * All rights reserved.
  * 
@@ -40,7 +40,10 @@
 struct md5 {
   unsigned int sz[2];
   u_int32_t counter[4];
-  unsigned char save[64];
+  union {
+    unsigned char save[64];
+    u_int32_t save32[16];
+  };
 };
 
 typedef struct md5 MD5_CTX;
diff --git a/libc/bionic/sha1.c b/libc/bionic/sha1.c
index efa95a5..7384812 100644
--- a/libc/bionic/sha1.c
+++ b/libc/bionic/sha1.c
@@ -23,10 +23,6 @@
 #include <sha1.h>
 #include <string.h>
 
-#if HAVE_NBTOOL_CONFIG_H
-#include "nbtool_config.h"
-#endif
-
 #if !HAVE_SHA1_H
 
 #define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits))))
@@ -54,77 +50,16 @@
 #define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30);
 
 typedef union {
-    u_char c[64];
-    u_int l[16];
+    uint8_t c[64];
+    uint32_t l[16];
 } CHAR64LONG16;
 
-/* old sparc64 gcc could not compile this */
-#undef SPARC64_GCC_WORKAROUND
-#if defined(__sparc64__) && defined(__GNUC__) && __GNUC__ < 3
-#define SPARC64_GCC_WORKAROUND
-#endif
-
-#ifdef SPARC64_GCC_WORKAROUND
-void do_R01(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *);
-void do_R2(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *);
-void do_R3(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *);
-void do_R4(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *);
-
-#define nR0(v,w,x,y,z,i) R0(*v,*w,*x,*y,*z,i)
-#define nR1(v,w,x,y,z,i) R1(*v,*w,*x,*y,*z,i)
-#define nR2(v,w,x,y,z,i) R2(*v,*w,*x,*y,*z,i)
-#define nR3(v,w,x,y,z,i) R3(*v,*w,*x,*y,*z,i)
-#define nR4(v,w,x,y,z,i) R4(*v,*w,*x,*y,*z,i)
-
-void
-do_R01(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *block)
-{
-    nR0(a,b,c,d,e, 0); nR0(e,a,b,c,d, 1); nR0(d,e,a,b,c, 2); nR0(c,d,e,a,b, 3);
-    nR0(b,c,d,e,a, 4); nR0(a,b,c,d,e, 5); nR0(e,a,b,c,d, 6); nR0(d,e,a,b,c, 7);
-    nR0(c,d,e,a,b, 8); nR0(b,c,d,e,a, 9); nR0(a,b,c,d,e,10); nR0(e,a,b,c,d,11);
-    nR0(d,e,a,b,c,12); nR0(c,d,e,a,b,13); nR0(b,c,d,e,a,14); nR0(a,b,c,d,e,15);
-    nR1(e,a,b,c,d,16); nR1(d,e,a,b,c,17); nR1(c,d,e,a,b,18); nR1(b,c,d,e,a,19);
-}
-
-void
-do_R2(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *block)
-{
-    nR2(a,b,c,d,e,20); nR2(e,a,b,c,d,21); nR2(d,e,a,b,c,22); nR2(c,d,e,a,b,23);
-    nR2(b,c,d,e,a,24); nR2(a,b,c,d,e,25); nR2(e,a,b,c,d,26); nR2(d,e,a,b,c,27);
-    nR2(c,d,e,a,b,28); nR2(b,c,d,e,a,29); nR2(a,b,c,d,e,30); nR2(e,a,b,c,d,31);
-    nR2(d,e,a,b,c,32); nR2(c,d,e,a,b,33); nR2(b,c,d,e,a,34); nR2(a,b,c,d,e,35);
-    nR2(e,a,b,c,d,36); nR2(d,e,a,b,c,37); nR2(c,d,e,a,b,38); nR2(b,c,d,e,a,39);
-}
-
-void
-do_R3(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *block)
-{
-    nR3(a,b,c,d,e,40); nR3(e,a,b,c,d,41); nR3(d,e,a,b,c,42); nR3(c,d,e,a,b,43);
-    nR3(b,c,d,e,a,44); nR3(a,b,c,d,e,45); nR3(e,a,b,c,d,46); nR3(d,e,a,b,c,47);
-    nR3(c,d,e,a,b,48); nR3(b,c,d,e,a,49); nR3(a,b,c,d,e,50); nR3(e,a,b,c,d,51);
-    nR3(d,e,a,b,c,52); nR3(c,d,e,a,b,53); nR3(b,c,d,e,a,54); nR3(a,b,c,d,e,55);
-    nR3(e,a,b,c,d,56); nR3(d,e,a,b,c,57); nR3(c,d,e,a,b,58); nR3(b,c,d,e,a,59);
-}
-
-void
-do_R4(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *block)
-{
-    nR4(a,b,c,d,e,60); nR4(e,a,b,c,d,61); nR4(d,e,a,b,c,62); nR4(c,d,e,a,b,63);
-    nR4(b,c,d,e,a,64); nR4(a,b,c,d,e,65); nR4(e,a,b,c,d,66); nR4(d,e,a,b,c,67);
-    nR4(c,d,e,a,b,68); nR4(b,c,d,e,a,69); nR4(a,b,c,d,e,70); nR4(e,a,b,c,d,71);
-    nR4(d,e,a,b,c,72); nR4(c,d,e,a,b,73); nR4(b,c,d,e,a,74); nR4(a,b,c,d,e,75);
-    nR4(e,a,b,c,d,76); nR4(d,e,a,b,c,77); nR4(c,d,e,a,b,78); nR4(b,c,d,e,a,79);
-}
-#endif
-
 /*
  * Hash a single 512-bit block. This is the core of the algorithm.
  */
-void SHA1Transform(state, buffer)
-    u_int32_t state[5];
-    const u_char buffer[64];
+void SHA1Transform(uint32_t state[5], const uint8_t buffer[64])
 {
-    u_int32_t a, b, c, d, e;
+    uint32_t a, b, c, d, e;
     CHAR64LONG16 *block;
 
 #ifdef SHA1HANDSOFF
@@ -148,12 +83,6 @@ void SHA1Transform(state, buffer)
     d = state[3];
     e = state[4];
 
-#ifdef SPARC64_GCC_WORKAROUND
-    do_R01(&a, &b, &c, &d, &e, block);
-    do_R2(&a, &b, &c, &d, &e, block);
-    do_R3(&a, &b, &c, &d, &e, block);
-    do_R4(&a, &b, &c, &d, &e, block);
-#else
     /* 4 rounds of 20 operations each. Loop unrolled. */
     R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
     R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
@@ -175,7 +104,6 @@ void SHA1Transform(state, buffer)
     R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
     R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
     R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
-#endif
 
     /* Add the working vars back into context.state[] */
     state[0] += a;
@@ -192,10 +120,8 @@ void SHA1Transform(state, buffer)
 /*
  * SHA1Init - Initialize new context
  */
-void SHA1Init(context)
-    SHA1_CTX *context;
+void SHA1Init(SHA1_CTX *context)
 {
-
     assert(context != 0);
 
     /* SHA1 initialization constants */
@@ -211,12 +137,9 @@ void SHA1Init(context)
 /*
  * Run your data through this.
  */
-void SHA1Update(context, data, len)
-    SHA1_CTX *context;
-    const u_char *data;
-    u_int len;
+void SHA1Update(SHA1_CTX *context, const uint8_t *data, unsigned int len)
 {
-    u_int i, j;
+    unsigned int i, j;
 
     assert(context != 0);
     assert(data != 0);
@@ -241,28 +164,26 @@ void SHA1Update(context, data, len)
 /*
  * Add padding and return the message digest.
  */
-void SHA1Final(digest, context)
-    u_char digest[20];
-    SHA1_CTX* context;
+void SHA1Final(uint8_t digest[20], SHA1_CTX *context)
 {
-    u_int i;
-    u_char finalcount[8];
+    unsigned int i;
+    uint8_t finalcount[8];
 
     assert(digest != 0);
     assert(context != 0);
 
     for (i = 0; i < 8; i++) {
-	finalcount[i] = (u_char)((context->count[(i >= 4 ? 0 : 1)]
+	finalcount[i] = (uint8_t)((context->count[(i >= 4 ? 0 : 1)]
 	 >> ((3-(i & 3)) * 8) ) & 255);	 /* Endian independent */
     }
-    SHA1Update(context, (const u_char *)"\200", 1);
+    SHA1Update(context, (const uint8_t *)"\200", 1);
     while ((context->count[0] & 504) != 448)
-	SHA1Update(context, (const u_char *)"\0", 1);
+	SHA1Update(context, (const uint8_t *)"\0", 1);
     SHA1Update(context, finalcount, 8);  /* Should cause a SHA1Transform() */
 
     if (digest) {
 	for (i = 0; i < 20; i++)
-	    digest[i] = (u_char)
+	    digest[i] = (uint8_t)
 		((context->state[i>>2] >> ((3-(i & 3)) * 8) ) & 255);
     }
 }
diff --git a/libc/bionic/system_properties.c b/libc/bionic/system_properties.c
index caa5ca6..756ee3f 100644
--- a/libc/bionic/system_properties.c
+++ b/libc/bionic/system_properties.c
@@ -158,7 +158,10 @@ int __system_property_get(const char *name, char *value)
 static int send_prop_msg(prop_msg *msg)
 {
     struct pollfd pollfds[1];
-    struct sockaddr_un addr;
+    union {
+        struct sockaddr_un addr;
+        struct sockaddr addr_g;
+    } addr;
     socklen_t alen;
     size_t namelen;
     int s;
@@ -172,11 +175,11 @@ static int send_prop_msg(prop_msg *msg)
 
     memset(&addr, 0, sizeof(addr));
     namelen = strlen(property_service_socket);
-    strlcpy(addr.sun_path, property_service_socket, sizeof addr.sun_path);
-    addr.sun_family = AF_LOCAL;
+    strlcpy(addr.addr.sun_path, property_service_socket, sizeof addr.addr.sun_path);
+    addr.addr.sun_family = AF_LOCAL;
     alen = namelen + offsetof(struct sockaddr_un, sun_path) + 1;
 
-    if(TEMP_FAILURE_RETRY(connect(s, (struct sockaddr *) &addr, alen)) < 0) {
+    if(TEMP_FAILURE_RETRY(connect(s, &addr.addr_g, alen) < 0)) {
         close(s);
         return result;
     }
diff --git a/libc/include/errno.h b/libc/include/errno.h
index e1b15c0..d3b0506 100644
--- a/libc/include/errno.h
+++ b/libc/include/errno.h
@@ -45,6 +45,7 @@ __BEGIN_DECLS
 extern int    __set_errno(int  error);
 
 /* internal function returning the address of the thread-specific errno */
+__attribute__((const))
 extern volatile int*   __errno(void);
 
 /* a macro expanding to the errno l-value */
diff --git a/libc/include/netinet/in6.h b/libc/include/netinet/in6.h
index 7f3286a..ba24b6c 100644
--- a/libc/include/netinet/in6.h
+++ b/libc/include/netinet/in6.h
@@ -31,28 +31,28 @@
 #include <linux/in6.h>
 
 #define IN6_IS_ADDR_UNSPECIFIED(a)	\
-	((*(const uint32_t *)(const void *)(&(a)->s6_addr[0]) == 0) &&	\
-	 (*(const uint32_t *)(const void *)(&(a)->s6_addr[4]) == 0) &&	\
-	 (*(const uint32_t *)(const void *)(&(a)->s6_addr[8]) == 0) &&	\
-	 (*(const uint32_t *)(const void *)(&(a)->s6_addr[12]) == 0))
+	(((a)->s6_addr32[0] == 0) &&	\
+	 ((a)->s6_addr32[1] == 0) &&	\
+	 ((a)->s6_addr32[2] == 0) &&	\
+	 ((a)->s6_addr32[3] == 0))
 
 #define IN6_IS_ADDR_LOOPBACK(a)		\
-	((*(const uint32_t *)(const void *)(&(a)->s6_addr[0]) == 0) &&	\
-	 (*(const uint32_t *)(const void *)(&(a)->s6_addr[4]) == 0) &&	\
-	 (*(const uint32_t *)(const void *)(&(a)->s6_addr[8]) == 0) &&	\
-	 (*(const uint32_t *)(const void *)(&(a)->s6_addr[12]) == ntohl(1)))
+	(((a)->s6_addr32[0] == 0) &&	\
+	 ((a)->s6_addr32[1] == 0) &&	\
+	 ((a)->s6_addr32[2] == 0) &&	\
+	 ((a)->s6_addr32[3] == ntohl(1)))
 
 #define IN6_IS_ADDR_V4COMPAT(a)		\
-	((*(const uint32_t *)(const void *)(&(a)->s6_addr[0]) == 0) &&	\
-	 (*(const uint32_t *)(const void *)(&(a)->s6_addr[4]) == 0) &&	\
-	 (*(const uint32_t *)(const void *)(&(a)->s6_addr[8]) == 0) &&	\
-	 (*(const uint32_t *)(const void *)(&(a)->s6_addr[12]) != 0) &&	\
-	 (*(const uint32_t *)(const void *)(&(a)->s6_addr[12]) != ntohl(1)))
+	(((a)->s6_addr32[0] == 0) &&	\
+	 ((a)->s6_addr32[1] == 0) &&	\
+	 ((a)->s6_addr32[2] == 0) &&	\
+	 ((a)->s6_addr32[3] != 0) &&	\
+	 ((a)->s6_addr32[3] != ntohl(1)))
 
 #define IN6_IS_ADDR_V4MAPPED(a)		      \
-	((*(const uint32_t *)(const void *)(&(a)->s6_addr[0]) == 0) &&	\
-	 (*(const uint32_t *)(const void *)(&(a)->s6_addr[4]) == 0) &&	\
-	 (*(const uint32_t *)(const void *)(&(a)->s6_addr[8]) == ntohl(0x0000ffff)))
+	(((a)->s6_addr32[0] == 0) &&	\
+	 ((a)->s6_addr32[1] == 0) &&	\
+	 ((a)->s6_addr32[2] == ntohl(0x0000ffff)))
 
 #define IN6_IS_ADDR_LINKLOCAL(a)	\
 	(((a)->s6_addr[0] == 0xfe) && (((a)->s6_addr[1] & 0xc0) == 0x80))
@@ -65,7 +65,7 @@
 	(((a)->s6_addr[0] & 0xfe) == 0xfc)
 
 #define IN6_IS_ADDR_MULTICAST(a)	\
-	(((__const uint8_t *) (a))[0] == 0xff)
+	((a)->s6_addr[0] == 0xff)
 
 
 #define IPV6_ADDR_SCOPE_NODELOCAL       0x01
diff --git a/libc/include/pthread.h b/libc/include/pthread.h
index 2015ac0..af0cc5f 100644
--- a/libc/include/pthread.h
+++ b/libc/include/pthread.h
@@ -146,6 +146,7 @@ void pthread_exit(void * retval);
 int pthread_join(pthread_t thid, void ** ret_val);
 int pthread_detach(pthread_t  thid);
 
+__attribute__((const))
 pthread_t pthread_self(void);
 int pthread_equal(pthread_t one, pthread_t two);
 
diff --git a/libc/include/resolv.h b/libc/include/resolv.h
index 7c34012..221410d 100644
--- a/libc/include/resolv.h
+++ b/libc/include/resolv.h
@@ -40,7 +40,7 @@ __BEGIN_DECLS
 
 struct res_state;
 
-extern struct __res_state *__res_state(void);
+extern struct __res_state *__res_state(void) __attribute__((const));
 #define _res (*__res_state())
 
 /* Base-64 functions - because some code expects it there */
diff --git a/libc/include/sha1.h b/libc/include/sha1.h
index f7ada46..adfa1fc 100644
--- a/libc/include/sha1.h
+++ b/libc/include/sha1.h
@@ -18,14 +18,14 @@
 typedef struct {
 	uint32_t state[5];
 	uint32_t count[2];
-	u_char buffer[64];
+	uint8_t buffer[64];
 } SHA1_CTX;
 
 __BEGIN_DECLS
-void	SHA1Transform(uint32_t[5], const u_char[64]);
+void	SHA1Transform(uint32_t[5], const uint8_t[64]);
 void	SHA1Init(SHA1_CTX *);
-void	SHA1Update(SHA1_CTX *, const u_char *, u_int);
-void	SHA1Final(u_char[SHA1_DIGEST_LENGTH], SHA1_CTX *);
+void	SHA1Update(SHA1_CTX *, const uint8_t *, unsigned int);
+void	SHA1Final(uint8_t[SHA1_DIGEST_LENGTH], SHA1_CTX *);
 __END_DECLS
 
 #endif /* _SYS_SHA1_H_ */
diff --git a/libc/include/string.h b/libc/include/string.h
index 06e2284..2ed74e8 100644
--- a/libc/include/string.h
+++ b/libc/include/string.h
@@ -224,6 +224,39 @@ size_t strlen(const char *s) {
     return __strlen_chk(s, bos);
 }
 
+__purefunc extern char* __strchr_real(const char *, int)
+    __asm__(__USER_LABEL_PREFIX__ "strchr");
+extern char* __strchr_chk(const char *, int, size_t);
+
+__BIONIC_FORTIFY_INLINE
+char* strchr(const char *s, int c) {
+    size_t bos = __builtin_object_size(s, 0);
+
+    // Compiler doesn't know destination size. Don't call __strchr_chk
+    if (bos == __BIONIC_FORTIFY_UNKNOWN_SIZE) {
+        return __strchr_real(s, c);
+    }
+
+    return __strchr_chk(s, c, bos);
+}
+
+__purefunc extern char* __strrchr_real(const char *, int)
+    __asm__(__USER_LABEL_PREFIX__ "strrchr");
+extern char* __strrchr_chk(const char *, int, size_t);
+
+__BIONIC_FORTIFY_INLINE
+char* strrchr(const char *s, int c) {
+    size_t bos = __builtin_object_size(s, 0);
+
+    // Compiler doesn't know destination size. Don't call __strrchr_chk
+    if (bos == __BIONIC_FORTIFY_UNKNOWN_SIZE) {
+        return __strrchr_real(s, c);
+    }
+
+    return __strrchr_chk(s, c, bos);
+}
+
+
 #endif /* defined(__BIONIC_FORTIFY_INLINE) */
 
 __END_DECLS
diff --git a/libc/kernel/arch-arm/asm/unistd.h b/libc/kernel/arch-arm/asm/unistd.h
index 454ed89..b3d75ca 100644
--- a/libc/kernel/arch-arm/asm/unistd.h
+++ b/libc/kernel/arch-arm/asm/unistd.h
@@ -466,7 +466,7 @@
 #define __ARM_NR_usr32 (__ARM_NR_BASE+4)
 #define __ARM_NR_set_tls (__ARM_NR_BASE+5)
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
-#ifdef __ARM_EABI__
+#if defined(__ARM_EABI__) && !defined(__KERNEL__)
 #undef __NR_time
 #undef __NR_umount
 #undef __NR_stime
diff --git a/libc/kernel/common/linux/android_pmem.h b/libc/kernel/common/linux/android_pmem.h
index 8c605e4..f463807 100644
--- a/libc/kernel/common/linux/android_pmem.h
+++ b/libc/kernel/common/linux/android_pmem.h
@@ -29,6 +29,11 @@
 #define PMEM_CONNECT _IOW(PMEM_IOCTL_MAGIC, 6, unsigned int)
 #define PMEM_GET_TOTAL_SIZE _IOW(PMEM_IOCTL_MAGIC, 7, unsigned int)
 #define PMEM_CACHE_FLUSH _IOW(PMEM_IOCTL_MAGIC, 8, unsigned int)
+
+#define PMEM_CLEAN_INV_CACHES _IOW(PMEM_IOCTL_MAGIC, 11, unsigned int)
+
+#define PMEM_ALLOCATE_ALIGNED _IOW(PMEM_IOCTL_MAGIC, 15, unsigned int)
+
 struct android_pmem_platform_data
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 {
@@ -46,4 +51,16 @@ struct pmem_region {
  unsigned long len;
 };
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+
+struct pmem_addr {
+ unsigned long vaddr;
+ unsigned long offset;
+ unsigned long length;
+};
+
+struct pmem_allocation {
+ unsigned long size;
+ unsigned int align;
+};
+
 #endif
diff --git a/libc/kernel/common/linux/ashmem.h b/libc/kernel/common/linux/ashmem.h
index e402e4e..a24d75a 100644
--- a/libc/kernel/common/linux/ashmem.h
+++ b/libc/kernel/common/linux/ashmem.h
@@ -47,4 +47,6 @@ struct ashmem_pin {
 #define ASHMEM_GET_PIN_STATUS _IO(__ASHMEMIOC, 9)
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define ASHMEM_PURGE_ALL_CACHES _IO(__ASHMEMIOC, 10)
+#define ASHMEM_CACHE_FLUSH_RANGE _IO(__ASHMEMIOC, 11)
+
 #endif
diff --git a/libc/netbsd/gethnamaddr.c b/libc/netbsd/gethnamaddr.c
index 9a9f6e2..055e9f2 100644
--- a/libc/netbsd/gethnamaddr.c
+++ b/libc/netbsd/gethnamaddr.c
@@ -653,14 +653,14 @@ gethostbyaddr(const void *addr,
 	assert(addr != NULL);
 
 	if (af == AF_INET6 && len == IN6ADDRSZ &&
-	    (IN6_IS_ADDR_LINKLOCAL((const struct in6_addr *)(const void *)uaddr) ||
-	     IN6_IS_ADDR_SITELOCAL((const struct in6_addr *)(const void *)uaddr))) {
+	    (IN6_IS_ADDR_LINKLOCAL((const struct in6_addr *)addr) ||
+	     IN6_IS_ADDR_SITELOCAL((const struct in6_addr *)addr))) {
 		h_errno = HOST_NOT_FOUND;
 		return NULL;
 	}
 	if (af == AF_INET6 && len == IN6ADDRSZ &&
-	    (IN6_IS_ADDR_V4MAPPED((const struct in6_addr *)(const void *)uaddr) ||
-	     IN6_IS_ADDR_V4COMPAT((const struct in6_addr *)(const void *)uaddr))) {
+	    (IN6_IS_ADDR_V4MAPPED((const struct in6_addr *)addr) ||
+	     IN6_IS_ADDR_V4COMPAT((const struct in6_addr *)addr))) {
 		/* Unmap. */
 		addr += IN6ADDRSZ - INADDRSZ;
 		uaddr += IN6ADDRSZ - INADDRSZ;
diff --git a/libc/netbsd/net/getaddrinfo.c b/libc/netbsd/net/getaddrinfo.c
index 326b09c..bd29c5a 100644
--- a/libc/netbsd/net/getaddrinfo.c
+++ b/libc/netbsd/net/getaddrinfo.c
@@ -411,7 +411,10 @@ android_getaddrinfo_proxy(
 {
 	int sock;
 	const int one = 1;
-	struct sockaddr_un proxy_addr;
+	union {
+		struct sockaddr_un un;
+		struct sockaddr generic;
+	} proxy_addr;
 	const char* cache_mode = getenv("ANDROID_DNS_MODE");
 	FILE* proxy = NULL;
 	int success = 0;
@@ -452,12 +455,12 @@ android_getaddrinfo_proxy(
 
 	setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
 	memset(&proxy_addr, 0, sizeof(proxy_addr));
-	proxy_addr.sun_family = AF_UNIX;
-	strlcpy(proxy_addr.sun_path, "/dev/socket/dnsproxyd",
-		sizeof(proxy_addr.sun_path));
+	proxy_addr.un.sun_family = AF_UNIX;
+	strlcpy(proxy_addr.un.sun_path, "/dev/socket/dnsproxyd",
+		sizeof(proxy_addr.un.sun_path));
 	if (TEMP_FAILURE_RETRY(connect(sock,
-				       (const struct sockaddr*) &proxy_addr,
-				       sizeof(proxy_addr))) != 0) {
+				       &proxy_addr.generic,
+				       sizeof(proxy_addr.un))) != 0) {
 		close(sock);
 		return -1;
 	}
@@ -1547,7 +1550,7 @@ _get_scope(const struct sockaddr *addr)
 
 /* RFC 4380, section 2.6 */
 #define IN6_IS_ADDR_TEREDO(a)	 \
-	((*(const uint32_t *)(const void *)(&(a)->s6_addr[0]) == ntohl(0x20010000)))
+	(((a)->s6_addr32[0]) == ntohl(0x20010000))
 
 /* RFC 3056, section 2. */
 #define IN6_IS_ADDR_6TO4(a)	 \
diff --git a/libc/netbsd/net/getnameinfo.c b/libc/netbsd/net/getnameinfo.c
index d8ac037..da9d7e3 100644
--- a/libc/netbsd/net/getnameinfo.c
+++ b/libc/netbsd/net/getnameinfo.c
@@ -147,7 +147,10 @@ android_gethostbyaddr_proxy(char* nameBuf, size_t nameBufLen, const void *addr,
 
 	int sock;
 	const int one = 1;
-	struct sockaddr_un proxy_addr;
+	union {
+		struct sockaddr_un un;
+		struct sockaddr generic;
+	} proxy_addr;
 	const char* cache_mode = getenv("ANDROID_DNS_MODE");
 	FILE* proxy = NULL;
 	int result = -1;
@@ -175,11 +178,11 @@ android_gethostbyaddr_proxy(char* nameBuf, size_t nameBufLen, const void *addr,
 
 	setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
 	memset(&proxy_addr, 0, sizeof(proxy_addr));
-	proxy_addr.sun_family = AF_UNIX;
-	strlcpy(proxy_addr.sun_path, "/dev/socket/dnsproxyd",
-			sizeof(proxy_addr.sun_path));
-	if (TEMP_FAILURE_RETRY(connect(sock, (const struct sockaddr*) (void*) &proxy_addr,
-							sizeof(proxy_addr))) != 0) {
+	proxy_addr.un.sun_family = AF_UNIX;
+	strlcpy(proxy_addr.un.sun_path, "/dev/socket/dnsproxyd",
+			sizeof(proxy_addr.un.sun_path));
+	if (TEMP_FAILURE_RETRY(connect(sock, &proxy_addr.generic,
+							sizeof(proxy_addr.un))) != 0) {
 		close(sock);
 		return -1;
 	}
diff --git a/libc/netbsd/resolv/res_send.c b/libc/netbsd/resolv/res_send.c
index f3ee539..028ffaf 100644
--- a/libc/netbsd/resolv/res_send.c
+++ b/libc/netbsd/resolv/res_send.c
@@ -404,7 +404,10 @@ res_nsend(res_state statp,
 	 */
 	if (EXT(statp).nscount != 0) {
 		int needclose = 0;
-		struct sockaddr_storage peer;
+		union {
+			struct sockaddr_storage storage;
+			struct sockaddr generic;
+		} peer;
 		socklen_t peerlen;
 
 		if (EXT(statp).nscount != statp->nscount)
@@ -420,13 +423,13 @@ res_nsend(res_state statp,
 
 				if (EXT(statp).nssocks[ns] == -1)
 					continue;
-				peerlen = sizeof(peer);
+				peerlen = sizeof(peer.storage);
 				if (getpeername(EXT(statp).nssocks[ns],
-				    (struct sockaddr *)(void *)&peer, &peerlen) < 0) {
+				    &peer.generic, &peerlen) < 0) {
 					needclose++;
 					break;
 				}
-				if (!sock_eq((struct sockaddr *)(void *)&peer,
+				if (!sock_eq(&peer.generic,
 				    get_nsaddr(statp, (size_t)ns))) {
 					needclose++;
 					break;
@@ -750,12 +753,15 @@ send_vc(res_state statp,
 
 	/* Are we still talking to whom we want to talk to? */
 	if (statp->_vcsock >= 0 && (statp->_flags & RES_F_VC) != 0) {
-		struct sockaddr_storage peer;
-		socklen_t size = sizeof peer;
+		union {
+			struct sockaddr_storage storage;
+			struct sockaddr generic;
+		} peer;
+		socklen_t size = sizeof peer.storage;
 
 		if (getpeername(statp->_vcsock,
-				(struct sockaddr *)(void *)&peer, &size) < 0 ||
-		    !sock_eq((struct sockaddr *)(void *)&peer, nsap)) {
+				&peer.generic, &size) < 0 ||
+		    !sock_eq(&peer.generic, nsap)) {
 			res_nclose(statp);
 			statp->_flags &= ~RES_F_VC;
 		}
@@ -1034,7 +1040,10 @@ send_dg(res_state statp,
 	int nsaplen;
 	struct timespec now, timeout, finish;
 	fd_set dsmask;
-	struct sockaddr_storage from;
+	union {
+		struct sockaddr_storage storage;
+		struct sockaddr generic;
+	} from;
 	socklen_t fromlen;
 	int resplen, seconds, n, s;
 
@@ -1126,9 +1135,9 @@ retry:
 		return (0);
 	}
 	errno = 0;
-	fromlen = sizeof(from);
+	fromlen = sizeof(from.storage);
 	resplen = recvfrom(s, (char*)ans, (size_t)anssiz,0,
-			   (struct sockaddr *)(void *)&from, &fromlen);
+			   &from.generic, &fromlen);
 	if (resplen <= 0) {
 		Perror(statp, stderr, "recvfrom", errno);
 		res_nclose(statp);
@@ -1162,7 +1171,7 @@ retry:
 		goto retry;
 	}
 	if (!(statp->options & RES_INSECURE1) &&
-	    !res_ourserver_p(statp, (struct sockaddr *)(void *)&from)) {
+	    !res_ourserver_p(statp, &from.generic)) {
 		/*
 		 * response from wrong server? ignore it.
 		 * XXX - potential security hazard could
diff --git a/libc/private/bionic_atomic_arm.h b/libc/private/bionic_atomic_arm.h
index 275c1c9..380c143 100644
--- a/libc/private/bionic_atomic_arm.h
+++ b/libc/private/bionic_atomic_arm.h
@@ -124,6 +124,11 @@ __bionic_memory_barrier(void)
 }
 #endif /* !ANDROID_SMP */
 
+/* LDREX/STREX routines broken on ARMv6 */
+#  if __ARM_ARCH__ == 6
+#    define BROKEN_REX
+#  endif
+
 /* Compare-and-swap, without any explicit barriers. Note that this functions
  * returns 0 on success, and 1 on failure. The opposite convention is typically
  * used on other platforms.
@@ -135,7 +140,7 @@ __bionic_memory_barrier(void)
  *
  * LDREX/STREX are only available starting from ARMv6
  */
-#ifdef __ARM_HAVE_LDREX_STREX
+#if defined(__ARM_HAVE_LDREX_STREX) && !defined(BROKEN_REX)
 __ATOMIC_INLINE__ int
 __bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
 {
@@ -182,7 +187,7 @@ __bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
  *   ARMv6+ => use LDREX/STREX
  *   < ARMv6 => use SWP instead.
  */
-#ifdef __ARM_HAVE_LDREX_STREX
+#if defined(__ARM_HAVE_LDREX_STREX) && !defined(BROKEN_REX)
 __ATOMIC_INLINE__ int32_t
 __bionic_swap(int32_t new_value, volatile int32_t* ptr)
 {
@@ -216,7 +221,7 @@ __bionic_swap(int32_t new_value, volatile int32_t* ptr)
 /* Atomic increment - without any barriers
  * This returns the old value
  */
-#ifdef __ARM_HAVE_LDREX_STREX
+#if defined(__ARM_HAVE_LDREX_STREX) && !defined(BROKEN_REX)
 __ATOMIC_INLINE__ int32_t
 __bionic_atomic_inc(volatile int32_t* ptr)
 {
@@ -250,7 +255,7 @@ __bionic_atomic_inc(volatile int32_t* ptr)
 /* Atomic decrement - without any barriers
  * This returns the old value.
  */
-#ifdef __ARM_HAVE_LDREX_STREX
+#if defined(__ARM_HAVE_LDREX_STREX) && !defined(BROKEN_REX)
 __ATOMIC_INLINE__ int32_t
 __bionic_atomic_dec(volatile int32_t* ptr)
 {
diff --git a/libc/private/bionic_tls.h b/libc/private/bionic_tls.h
index 4658866..2456ebb 100644
--- a/libc/private/bionic_tls.h
+++ b/libc/private/bionic_tls.h
@@ -100,7 +100,9 @@ extern int __set_tls(void *ptr);
  * C library, because we don't know where the corresponding code
  * is going to run.
  */
-#  ifdef LIBC_STATIC
+#  if defined(LIBC_STATIC) || \
+     (defined(__ARM_ARCH_6__) && defined(HAVE_ARM_TLS_REGISTER) && \
+     !defined(__ARM_ARCH_6T2__))
 
 /* Use the kernel helper in static C library. */
   typedef volatile void* (__kernel_get_tls_t)(void);
@@ -111,6 +113,12 @@ extern int __set_tls(void *ptr);
  * Note that HAVE_ARM_TLS_REGISTER is build-specific
  * (it must match your kernel configuration)
  */
+#  ifdef HAVE_TEGRA_ERRATA_657451
+#    define __munge_tls(_v) ( ((_v)&~((1ul<<20)|1ul)) | (((_v)&0x1)<<20) )
+#  else
+#    define __munge_tls(_v) (_v)
+#endif
+
 #    ifdef HAVE_ARM_TLS_REGISTER
  /* We can read the address directly from a coprocessor
   * register, which avoids touching the data cache
@@ -119,6 +127,7 @@ extern int __set_tls(void *ptr);
 #      define __get_tls() \
     ({ register unsigned int __val asm("r0"); \
        asm ("mrc p15, 0, r0, c13, c0, 3" : "=r"(__val) ); \
+       __val = __munge_tls(__val); \
        (volatile void*)__val; })
 #    else /* !HAVE_ARM_TLS_REGISTER */
  /* The kernel provides the address of the TLS at a fixed
diff --git a/libc/private/logd.h b/libc/private/logd.h
index c81a91a..26878ba 100644
--- a/libc/private/logd.h
+++ b/libc/private/logd.h
@@ -29,6 +29,7 @@
 #define _ANDROID_BIONIC_LOGD_H
 
 #include <stdarg.h>
+#include <stdint.h>
 
 #define BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW 80100
 #define BIONIC_EVENT_STRCAT_BUFFER_OVERFLOW 80105
diff --git a/libc/string/strchr.c b/libc/string/strchr.c
index 9b4332c..44516ef 100644
--- a/libc/string/strchr.c
+++ b/libc/string/strchr.c
@@ -29,11 +29,17 @@
  */
 
 #include <string.h>
+#include <private/logd.h>
 
 char *
-strchr(const char *p, int ch)
+__strchr_chk(const char *p, int ch, size_t s_len)
 {
-	for (;; ++p) {
+	for (;; ++p, s_len--) {
+		if (s_len == 0) {
+			__libc_android_log_print(ANDROID_LOG_FATAL, "libc",
+				"*** FORTIFY_SOURCE strchr read beyond buffer ***\n");
+			abort();
+		}
 		if (*p == (char) ch)
 			return((char *)p);
 		if (!*p)
@@ -41,3 +47,8 @@ strchr(const char *p, int ch)
 	}
 	/* NOTREACHED */
 }
+
+char *
+strchr(const char *p, int ch) {
+    return __strchr_chk(p, ch, (size_t) -1);
+}
diff --git a/libc/string/strrchr.c b/libc/string/strrchr.c
index 10c07e6..fc3dc4e 100644
--- a/libc/string/strrchr.c
+++ b/libc/string/strrchr.c
@@ -29,13 +29,19 @@
  */
 
 #include <string.h>
+#include <private/logd.h>
 
 char *
-strrchr(const char *p, int ch)
+__strrchr_chk(const char *p, int ch, size_t s_len)
 {
 	char *save;
 
-	for (save = NULL;; ++p) {
+	for (save = NULL;; ++p, s_len--) {
+		if (s_len == 0) {
+			__libc_android_log_print(ANDROID_LOG_FATAL, "libc",
+				"*** FORTIFY_SOURCE strrchr read beyond buffer ***\n");
+			abort();
+		}
 		if (*p == (char) ch)
 			save = (char *)p;
 		if (!*p)
@@ -43,3 +49,9 @@ strrchr(const char *p, int ch)
 	}
 	/* NOTREACHED */
 }
+
+char *
+strrchr(const char *p, int ch)
+{
+	return __strrchr_chk(p, ch, (size_t) -1);
+}
diff --git a/libc/tools/zoneinfo/ZoneCompactor.java b/libc/tools/zoneinfo/ZoneCompactor.java
index b657748..cc77c94 100644
--- a/libc/tools/zoneinfo/ZoneCompactor.java
+++ b/libc/tools/zoneinfo/ZoneCompactor.java
@@ -55,11 +55,13 @@ public class ZoneCompactor {
 
         InputStream in = new FileInputStream(inFile);
         byte[] buf = new byte[8192];
+        int length = 0;
         while (true) {
             int nbytes = in.read(buf);
             if (nbytes == -1) {
                 break;
             }
+            length += nbytes;
             out.write(buf, 0, nbytes);
 
             byte[] nret = new byte[ret.length + nbytes];
@@ -67,6 +69,8 @@ public class ZoneCompactor {
             System.arraycopy(buf, 0, nret, ret.length, nbytes);
             ret = nret;
         }
+	if (length%4 != 0)
+            out.write(new byte[] {00,00,00,00}, 0, 4 - length % 4);
         out.flush();
         return ret;
     }
@@ -105,6 +109,9 @@ public class ZoneCompactor {
                     lengths.put(s, new Integer((int)length));
 
                     start += length;
+                    if (start % 4 != 0)
+                        start += 4 - start % 4;
+
                     byte[] data = copyFile(f, zoneInfo);
 
                     TimeZone tz = ZoneInfo.make(s, data);
diff --git a/libc/tools/zoneinfo/generate b/libc/tools/zoneinfo/generate
index ab2617f..7017e90 100755
--- a/libc/tools/zoneinfo/generate
+++ b/libc/tools/zoneinfo/generate
@@ -92,7 +92,7 @@ def upgrade_to(ftp, filename):
   subprocess.check_call(['javac', '-d', '.',
                          '%s/ZoneCompactor.java' % bionic_libc_tools_zoneinfo_dir,
                          '%s/ZoneInfo.java' % bionic_libc_tools_zoneinfo_dir])
-  subprocess.check_call(['java', 'ZoneCompactor', 'setup', 'data'])
+  subprocess.check_call(['java', '-classpath', '.', 'ZoneCompactor', 'setup', 'data'])
 
   print 'Updating bionic from %s to %s...' % (current_tzdata_version(), version)
   # Move the .dat and .idx files...
@@ -116,7 +116,8 @@ ftp.cwd('tz/releases')
 tzdata_filenames = []
 for filename in ftp.nlst():
   if filename.startswith('tzdata20'):
-    tzdata_filenames.append(filename)
+    if filename.endswith('tar.gz'):
+      tzdata_filenames.append(filename)
 tzdata_filenames.sort()
 
 # If you're several releases behind, we'll walk you through the upgrades one by one.
diff --git a/libc/unistd/getopt_long.c b/libc/unistd/getopt_long.c
index dbdf01a..0b8181a 100644
--- a/libc/unistd/getopt_long.c
+++ b/libc/unistd/getopt_long.c
@@ -100,12 +100,12 @@ static int nonopt_start = -1; /* first non option argument (for permute) */
 static int nonopt_end = -1;   /* first option after non options (for permute) */
 
 /* Error messages */
-static const char recargchar[] = "option requires an argument -- %c";
-static const char recargstring[] = "option requires an argument -- %s";
-static const char ambig[] = "ambiguous option -- %.*s";
-static const char noarg[] = "option doesn't take an argument -- %.*s";
-static const char illoptchar[] = "unknown option -- %c";
-static const char illoptstring[] = "unknown option -- %s";
+static const char recargchar[] = "option requires an argument -- %c\n";
+static const char recargstring[] = "option requires an argument -- %s\n";
+static const char ambig[] = "ambiguous option -- %.*s\n";
+static const char noarg[] = "option doesn't take an argument -- %.*s\n";
+static const char illoptchar[] = "unknown option -- %c\n";
+static const char illoptstring[] = "unknown option -- %s\n";
 
 /*
  * Compute the greatest common divisor of a and b.
diff --git a/libc/zoneinfo/zoneinfo.dat b/libc/zoneinfo/zoneinfo.dat
index cb0507a..cd4b4cc 100644
--- a/libc/zoneinfo/zoneinfo.dat
+++ b/libc/zoneinfo/zoneinfo.dat
diff --git a/libc/zoneinfo/zoneinfo.idx b/libc/zoneinfo/zoneinfo.idx
index c93b637..1f5f538 100644
--- a/libc/zoneinfo/zoneinfo.idx
+++ b/libc/zoneinfo/zoneinfo.idx
diff --git a/libc/zoneinfo/zoneinfo.version b/libc/zoneinfo/zoneinfo.version
index 73bb417..0bbfa63 100644
--- a/libc/zoneinfo/zoneinfo.version
+++ b/libc/zoneinfo/zoneinfo.version
@@ -1 +1 @@
-2012h
+2012j
diff --git a/libm/Android.mk b/libm/Android.mk
index 9c88798..a28f1b8 100644
--- a/libm/Android.mk
+++ b/libm/Android.mk
@@ -72,7 +72,6 @@ libm_common_src_files:= \
 	src/s_ceill.c \
 	src/s_copysign.c \
 	src/s_copysignf.c \
-	src/s_cos.c \
 	src/s_cosf.c \
 	src/s_erf.c \
 	src/s_erff.c \
@@ -132,7 +131,6 @@ libm_common_src_files:= \
 	src/s_signgam.c \
 	src/s_significand.c \
 	src/s_significandf.c \
-	src/s_sin.c \
 	src/s_sinf.c \
 	src/s_tan.c \
 	src/s_tanf.c \
@@ -162,6 +160,30 @@ ifeq ($(TARGET_ARCH),arm)
 	src/s_scalbnf.c \
 	src/e_sqrtf.c
 
+  ifeq ($(TARGET_USE_KRAIT_BIONIC_OPTIMIZATION),true)
+    libm_common_src_files += \
+	  arm/e_pow.S \
+	  arm/s_cos.S \
+	  arm/s_sin.S
+    libm_common_cflags += -DKRAIT_NEON_OPTIMIZATION -fno-if-conversion
+  else
+    libm_common_src_files += \
+	  src/s_cos.c \
+	  src/s_sin.c
+  endif
+
+  ifeq ($(TARGET_USE_SPARROW_BIONIC_OPTIMIZATION),true)
+    libm_common_src_files += \
+          arm/e_pow.S
+    libm_common_cflags += -DSPARROW_NEON_OPTIMIZATION
+  endif
+
+  ifeq ($(TARGET_USE_SCORPION_BIONIC_OPTIMIZATION),true)
+    libm_common_src_files += \
+          arm/e_pow.S
+    libm_common_cflags += -DSCORPION_NEON_OPTIMIZATION
+  endif
+
   libm_common_includes = $(LOCAL_PATH)/arm
 endif
 
@@ -182,7 +204,9 @@ ifeq ($(TARGET_ARCH),mips)
 	src/s_scalbln.c \
 	src/s_scalbn.c \
 	src/s_scalbnf.c \
-	src/e_sqrtf.c
+	src/e_sqrtf.c \
+	src/s_sin.c \
+	src/s_cos.c
 
   libm_common_includes = $(LOCAL_PATH)/mips
   # Need to build *rint* functions
@@ -201,6 +225,8 @@ LOCAL_ARM_MODE := arm
 LOCAL_C_INCLUDES += $(libm_common_includes)
 LOCAL_CFLAGS := $(libm_common_cflags)
 
+LOCAL_CFLAGS:= $(libm_common_cflags)
+
 LOCAL_MODULE:= libm
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 
@@ -221,6 +247,8 @@ LOCAL_ARM_MODE := arm
 LOCAL_C_INCLUDES += $(libm_common_includes)
 LOCAL_CFLAGS := $(libm_common_cflags)
 
+LOCAL_CFLAGS:= $(libm_common_cflags)
+
 LOCAL_MODULE:= libm
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 
diff --git a/libm/arm/e_pow.S b/libm/arm/e_pow.S
new file mode 100644
index 0000000..1e328f8
--- /dev/null
+++ b/libm/arm/e_pow.S
@@ -0,0 +1,443 @@
+@ Copyright (c) 2012, Code Aurora Forum. All rights reserved.
+@
+@ Redistribution and use in source and binary forms, with or without
+@ modification, are permitted provided that the following conditions are
+@ met:
+@     * Redistributions of source code must retain the above copyright
+@       notice, this list of conditions and the following disclaimer.
+@     * Redistributions in binary form must reproduce the above
+@       copyright notice, this list of conditions and the following
+@       disclaimer in the documentation and/or other materials provided
+@       with the distribution.
+@     * Neither the name of Code Aurora Forum, Inc. nor the names of its
+@       contributors may be used to endorse or promote products derived
+@       from this software without specific prior written permission.
+@
+@ THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+@ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+@ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
+@ ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+@ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+@ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+@ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+@ BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+@ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+@ OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+@ IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <machine/cpu-features.h>
+#include <machine/asm.h>
+
+@ Values which exist the program lifetime:
+#define HIGH_WORD_MASK      d31
+#define EXPONENT_MASK       d30
+#define int_1               d29
+#define double_1            d28
+@ sign and 2^int_n fixup:
+#define expadjustment       d7
+#define literals            r10
+@ Values which exist within both polynomial implementations:
+#define int_n               d2
+#define int_n_low           s4
+#define int_n_high          s5
+#define double_n            d3
+#define k1                  d27
+#define k2                  d26
+#define k3                  d25
+#define k4                  d24
+@ Values which cross the boundaries between polynomial implementations:
+#define ss                  d16
+#define ss2                 d17
+#define ss4                 d18
+#define Result              d0
+#define Return_hw           r1
+#define Return_lw           r0
+#define ylg2x               d0
+@ Intermediate values only needed sometimes:
+@ initial (sorted in approximate order of availability for overwriting):
+#define x_hw                r1
+#define x_lw                r0
+#define y_hw                r3
+#define y_lw                r2
+#define x                   d0
+#define bp                  d4
+#define y                   d1
+@ log series:
+#define u                   d19
+#define v                   d20
+#define lg2coeff            d21
+#define bpa                 d5
+#define bpb                 d3
+#define lg2const            d6
+#define xmantissa           r8
+#define twoto1o5            r4
+#define twoto3o5            r5
+#define ix                  r6
+#define iEXP_MASK           r7
+@ exp input setup:
+#define twoto1o8mask        d3
+#define twoto1o4mask        d4
+#define twoto1o2mask        d1
+#define ylg2x_round_offset  d16
+#define ylg2x_temp          d17
+#define yn_temp             d18
+#define yn_round_offset     d19
+#define ln2                 d5
+@ Careful, overwriting HIGH_WORD_MASK, reset it if you need it again ...
+#define rounded_exponent    d31
+@ exp series:
+#define k5                  d23
+#define k6                  d22
+#define k7                  d21
+#define k8                  d20
+#define ss3                 d19
+@ overwrite double_1 (we're done with it by now)
+#define k0                  d28
+#define twoto1o4            d6
+
+@instructions that gas doesn't like to encode correctly:
+#define vmov_f64            fconstd
+#define vmov_f32            fconsts
+#define vmovne_f64          fconstdne
+
+ENTRY(pow_neon)
+#if defined(KRAIT_NO_AAPCS_VFP_MODE)
+     @ ARM ABI has inputs coming in via r registers, lets move to a d register
+    vmov            x, x_lw, x_hw
+#endif
+    push            {r4, r5, r6, r7, r8, r9, r10, lr}
+
+    @ pre-staged bp values
+    vldr            bpa, .LbpA
+    vldr            bpb, .LbpB
+    @ load two fifths into constant term in case we need it due to offsets
+    vldr            lg2const, .Ltwofifths
+
+    @ bp is initially 1.0, may adjust later based on x value
+    vmov_f64        bp,  #0x70
+
+    @ extract the mantissa from x for scaled value comparisons
+    lsl             xmantissa, x_hw, #12
+
+    @ twoto1o5 = 2^(1/5) (input bracketing)
+    movw            twoto1o5, #0x186c
+    movt            twoto1o5, #0x2611
+    @ twoto3o5 = 2^(3/5) (input bracketing)
+    movw            twoto3o5, #0x003b
+    movt            twoto3o5, #0x8406
+
+    @ finish extracting xmantissa
+    orr             xmantissa, xmantissa, x_lw, lsr #20
+
+    @ begin preparing a mask for normalization
+    vmov.i64        HIGH_WORD_MASK, #0xffffffff00000000
+
+    @ double_1 = (double) 1.0
+    vmov_f64        double_1, #0x70
+
+#if defined(KRAIT_NO_AAPCS_VFP_MODE)
+     @ move y from r registers to a d register
+    vmov            y, y_lw, y_hw
+#endif
+
+    cmp             xmantissa, twoto1o5
+
+    vshl.i64        EXPONENT_MASK, HIGH_WORD_MASK, #20
+    vshr.u64        int_1, HIGH_WORD_MASK, #63
+
+    adr             literals, .LliteralTable
+
+    bhi             .Lxgt2to1over5
+    @ zero out lg2 constant term if don't offset our input
+    vsub.f64        lg2const, lg2const, lg2const
+    b               .Lxle2to1over5
+
+.Lxgt2to1over5:
+    @ if normalized x > 2^(1/5), bp = 1 + (2^(2/5)-1) = 2^(2/5)
+    vadd.f64        bp, bp, bpa
+
+.Lxle2to1over5:
+    @ will need ln2 for various things
+    vldr            ln2, .Lln2
+
+    cmp             xmantissa, twoto3o5
+@@@@ X Value Normalization @@@@
+
+    @ ss = abs(x) 2^(-1024)
+    vbic.i64        ss, x, EXPONENT_MASK
+
+    @ N = (floor(log2(x)) + 0x3ff) * 2^52
+    vand.i64        int_n, x, EXPONENT_MASK
+
+    bls             .Lxle2to3over5
+    @ if normalized x > 2^(3/5), bp = 2^(2/5) + (2^(4/5) - 2^(2/5) = 2^(4/5)
+    vadd.f64      bp, bp, bpb
+    vadd.f64      lg2const, lg2const, lg2const
+
+.Lxle2to3over5:
+
+    @ load log2 polynomial series constants
+    vldm            literals!, {k4, k3, k2, k1}
+
+    @ s = abs(x) 2^(-floor(log2(x))) (normalize abs(x) to around 1)
+    vorr.i64        ss, ss, double_1
+
+@@@@ 3/2 (Log(bp(1+s)/(1-s))) input computation (s = (x-bp)/(x+bp)) @@@@
+
+    vsub.f64        u, ss, bp
+    vadd.f64        v, ss, bp
+
+    @ s = (x-1)/(x+1)
+    vdiv.f64        ss, u, v
+
+    @ load 2/(3log2) into lg2coeff
+    vldr            lg2coeff, .Ltwooverthreeln2
+
+    @ N = floor(log2(x)) * 2^52
+    vsub.i64        int_n, int_n, double_1
+
+@@@@ 3/2 (Log(bp(1+s)/(1-s))) polynomial series @@@@
+
+    @ ss2 = ((x-dp)/(x+dp))^2
+    vmul.f64        ss2, ss, ss
+    @ ylg2x = 3.0
+    vmov_f64        ylg2x, #8
+    vmul.f64        ss4, ss2, ss2
+
+    @ todo: useful later for two-way clamp
+    vmul.f64        lg2coeff, lg2coeff, y
+
+    @ N = floor(log2(x))
+    vshr.s64        int_n, int_n, #52
+
+    @ k3 = ss^2 * L4 + L3
+    vmla.f64        k3, ss2, k4
+
+    @ k1 = ss^2 * L2 + L1
+    vmla.f64        k1, ss2, k2
+
+    @ scale ss by 2/(3 ln 2)
+    vmul.f64        lg2coeff, ss, lg2coeff
+
+    @ ylg2x = 3.0 + s^2
+    vadd.f64        ylg2x, ylg2x, ss2
+
+    vcvt.f64.s32    double_n, int_n_low
+
+    @ k1 = s^4 (s^2 L4 + L3) + s^2 L2 + L1
+    vmla.f64        k1, ss4, k3
+
+    @ add in constant term
+    vadd.f64        double_n, lg2const
+
+    @ ylg2x = 3.0 + s^2 + s^4 (s^4 (s^2 L4 + L3) + s^2 L2 + L1)
+    vmla.f64        ylg2x, ss4, k1
+
+    @ ylg2x = y 2 s / (3 ln(2)) (3.0 + s^2 + s^4 (s^4(s^2 L4 + L3) + s^2 L2 + L1)
+    vmul.f64        ylg2x, lg2coeff, ylg2x
+
+@@@@ Compute input to Exp(s) (s = y(n + log2(x)) - (floor(8 yn + 1)/8 + floor(8 ylog2(x) + 1)/8) @@@@@
+
+    @ mask to extract bit 1 (2^-2 from our fixed-point representation)
+    vshl.u64        twoto1o4mask, int_1, #1
+
+    @ double_n = y * n
+    vmul.f64        double_n, double_n, y
+
+    @ Load 2^(1/4) for later computations
+    vldr            twoto1o4, .Ltwoto1o4
+
+    @ either add or subtract one based on the sign of double_n and ylg2x
+    vshr.s64        ylg2x_round_offset, ylg2x, #62
+    vshr.s64        yn_round_offset, double_n, #62
+
+    @ move unmodified y*lg2x into temp space
+    vmov            ylg2x_temp, ylg2x
+    @ compute floor(8 y * n + 1)/8
+    @ and floor(8 y (log2(x)) + 1)/8
+    vcvt.s32.f64    ylg2x, ylg2x, #3
+    @ move unmodified y*n into temp space
+    vmov            yn_temp, double_n
+    vcvt.s32.f64    double_n, double_n, #3
+
+    @ load exp polynomial series constants
+    vldm            literals!, {k8, k7, k6, k5, k4, k3, k2, k1}
+
+    @ mask to extract bit 2 (2^-1 from our fixed-point representation)
+    vshl.u64        twoto1o2mask, int_1, #2
+
+    @ make rounding offsets either 1 or -1 instead of 0 or -2
+    vorr.u64        ylg2x_round_offset, ylg2x_round_offset, int_1
+    vorr.u64        yn_round_offset, yn_round_offset, int_1
+
+    @ round up to the nearest 1/8th
+    vadd.s32        ylg2x, ylg2x, ylg2x_round_offset
+    vadd.s32        double_n, double_n, yn_round_offset
+
+    @ clear out round-up bit for y log2(x)
+    vbic.s32        ylg2x, ylg2x, int_1
+    @ clear out round-up bit for yn
+    vbic.s32        double_n, double_n, int_1
+    @ add together the (fixed precision) rounded parts
+    vadd.s64        rounded_exponent, double_n, ylg2x
+    @ turn int_n into a double with value 2^int_n
+    vshl.i64        int_n, rounded_exponent, #49
+    @ compute masks for 2^(1/4) and 2^(1/2) fixups for fractional part of fixed-precision rounded values:
+    vand.u64        twoto1o4mask, twoto1o4mask, rounded_exponent
+    vand.u64        twoto1o2mask, twoto1o2mask, rounded_exponent
+
+    @ convert back into floating point, double_n now holds (double) floor(8 y * n + 1)/8
+    @                                   ylg2x now holds (double) floor(8 y * log2(x) + 1)/8
+    vcvt.f64.s32    ylg2x, ylg2x, #3
+    vcvt.f64.s32    double_n, double_n, #3
+
+    @ put the 2 bit (0.5) through the roof of twoto1o2mask (make it 0x0 or 0xffffffffffffffff)
+    vqshl.u64        twoto1o2mask, twoto1o2mask, #62
+    @ put the 1 bit (0.25) through the roof of twoto1o4mask (make it 0x0 or 0xffffffffffffffff)
+    vqshl.u64        twoto1o4mask, twoto1o4mask, #63
+
+    @ center y*log2(x) fractional part between -0.125 and 0.125 by subtracting (double) floor(8 y * log2(x) + 1)/8
+    vsub.f64        ylg2x_temp, ylg2x_temp, ylg2x
+    @ center y*n fractional part between -0.125 and 0.125 by subtracting (double) floor(8 y * n + 1)/8
+    vsub.f64        yn_temp, yn_temp, double_n
+
+    @ Add fractional parts of yn and y log2(x) together
+    vadd.f64        ss, ylg2x_temp, yn_temp
+
+    @ Result = 1.0 (offset for exp(s) series)
+    vmov_f64        Result, #0x70
+
+    @ multiply fractional part of y * log2(x) by ln(2)
+    vmul.f64        ss, ln2, ss
+
+@@@@ 10th order polynomial series for Exp(s) @@@@
+
+    @ ss2 = (ss)^2
+    vmul.f64        ss2, ss, ss
+
+    @ twoto1o2mask = twoto1o2mask & twoto1o4
+    vand.u64        twoto1o2mask, twoto1o2mask, twoto1o4
+    @ twoto1o2mask = twoto1o2mask & twoto1o4
+    vand.u64        twoto1o4mask, twoto1o4mask, twoto1o4
+
+    @ Result = 1.0 + ss
+    vadd.f64        Result, Result, ss
+
+    @ k7 = ss k8 + k7
+    vmla.f64        k7, ss, k8
+
+    @ ss4 = (ss*ss) * (ss*ss)
+    vmul.f64        ss4, ss2, ss2
+
+    @ twoto1o2mask = twoto1o2mask | (double) 1.0 - results in either 1.0 or 2^(1/4) in twoto1o2mask
+    vorr.u64        twoto1o2mask, twoto1o2mask, double_1
+    @ twoto1o2mask = twoto1o4mask | (double) 1.0 - results in either 1.0 or 2^(1/4) in twoto1o4mask
+    vorr.u64        twoto1o4mask, twoto1o4mask, double_1
+
+    @ TODO: should setup sign here, expadjustment = 1.0
+    vmov_f64        expadjustment, #0x70
+
+    @ ss3 = (ss*ss) * ss
+    vmul.f64        ss3, ss2, ss
+
+    @ k0 = 1/2 (first non-unity coefficient)
+    vmov_f64        k0, #0x60
+
+    @ Mask out non-exponent bits to make sure we have just 2^int_n
+    vand.i64        int_n, int_n, EXPONENT_MASK
+
+    @ square twoto1o2mask to get 1.0 or 2^(1/2)
+    vmul.f64        twoto1o2mask, twoto1o2mask, twoto1o2mask
+    @ multiply twoto2o4mask into the exponent output adjustment value
+    vmul.f64        expadjustment, expadjustment, twoto1o4mask
+
+    @ k5 = ss k6 + k5
+    vmla.f64        k5, ss, k6
+
+    @ k3 = ss k4 + k3
+    vmla.f64        k3, ss, k4
+
+    @ k1 = ss k2 + k1
+    vmla.f64        k1, ss, k2
+
+    @ multiply twoto1o2mask into exponent output adjustment value
+    vmul.f64        expadjustment, expadjustment, twoto1o2mask
+
+    @ k5 = ss^2 ( ss k8 + k7 ) + ss k6 + k5
+    vmla.f64        k5, ss2, k7
+
+    @ k1 = ss^2 ( ss k4 + k3 ) + ss k2 + k1
+    vmla.f64        k1, ss2, k3
+
+    @ Result = 1.0 + ss + 1/2 ss^2
+    vmla.f64      Result, ss2, k0
+
+    @ Adjust int_n so that it's a double precision value that can be multiplied by Result
+    vadd.i64        expadjustment, int_n, expadjustment
+
+    @ k1 = ss^4 ( ss^2 ( ss k8 + k7 ) + ss k6 + k5 ) + ss^2 ( ss k4 + k3 ) + ss k2 + k1
+    vmla.f64        k1, ss4, k5
+
+    @ Result = 1.0 + ss + 1/2 ss^2 + ss^3 ( ss^4 ( ss^2 ( ss k8 + k7 ) + ss k6 + k5 ) + ss^2 ( ss k4 + k3 ) + ss k2 + k1 )
+    vmla.f64        Result, ss3, k1
+
+    @ multiply by adjustment (sign*(rounding ? sqrt(2) : 1) * 2^int_n)
+    vmul.f64        Result, expadjustment, Result
+
+.LleavePow:
+#if defined(KRAIT_NO_AAPCS_VFP_MODE)
+    @ return Result (FP)
+    vmov            Return_lw, Return_hw, Result
+#endif
+.LleavePowDirect:
+    @ leave directly returning whatever is in Return_lw and Return_hw
+    pop             {r4, r5, r6, r7, r8, r9, r10, pc}
+
+.align 6
+.LliteralTable:
+@ Least-sqares tuned constants for 11th order (log2((1+s)/(1-s)):
+.LL4: @ ~3/11
+    .long       0x53a79915, 0x3fd1b108
+.LL3: @ ~1/3
+    .long       0x9ca0567a, 0x3fd554fa
+.LL2: @ ~3/7
+    .long       0x1408e660, 0x3fdb6db7
+.LL1: @ ~3/5
+    .long       0x332D4313, 0x3fe33333
+
+@ Least-squares tuned constants for 10th order exp(s):
+.LE10: @ ~1/3628800
+    .long       0x25c7ba0a, 0x3e92819b
+.LE9: @ ~1/362880
+    .long       0x9499b49c, 0x3ec72294
+.LE8: @ ~1/40320
+    .long       0xabb79d95, 0x3efa019f
+.LE7: @ ~1/5040
+    .long       0x8723aeaa, 0x3f2a019f
+.LE6: @ ~1/720
+    .long       0x16c76a94, 0x3f56c16c
+.LE5: @ ~1/120
+    .long       0x11185da8, 0x3f811111
+.LE4: @ ~1/24
+    .long       0x5555551c, 0x3fa55555
+.LE3: @ ~1/6
+    .long       0x555554db, 0x3fc55555
+
+.LbpA: @ (2^(2/5) - 1)
+    .long       0x4ee54db1, 0x3fd472d1
+
+.LbpB: @ (2^(4/5) - 2^(2/5))
+    .long       0x1c8a36cf, 0x3fdafb62
+
+.Ltwofifths: @
+    .long       0x9999999a, 0x3fd99999
+
+.Ltwooverthreeln2:
+    .long       0xDC3A03FD, 0x3FEEC709
+
+.Lln2: @ ln(2)
+    .long       0xFEFA39EF, 0x3FE62E42
+
+.Ltwoto1o4: @ 2^1/4
+    .long       0x0a31b715, 0x3ff306fe
+END(pow)
diff --git a/libm/arm/s_cos.S b/libm/arm/s_cos.S
new file mode 100644
index 0000000..30a6767
--- /dev/null
+++ b/libm/arm/s_cos.S
@@ -0,0 +1,419 @@
+@ Copyright (c) 2012, The Linux Foundation. All rights reserved.
+@
+@ Redistribution and use in source and binary forms, with or without
+@ modification, are permitted provided that the following conditions are
+@ met:
+@     * Redistributions of source code must retain the above copyright
+@       notice, this list of conditions and the following disclaimer.
+@     * Redistributions in binary form must reproduce the above
+@       copyright notice, this list of conditions and the following
+@       disclaimer in the documentation and/or other materials provided
+@       with the distribution.
+@     * Neither the name of Code Aurora Forum, Inc. nor the names of its
+@       contributors may be used to endorse or promote products derived
+@       from this software without specific prior written permission.
+@
+@ THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+@ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+@ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
+@ ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+@ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+@ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+@ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+@ BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+@ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+@ OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+@ IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+@
+@ Additional notices preserved for attributions purposes only.
+@
+@ ====================================================
+@ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+@
+@ Developed at SunSoft, a Sun Microsystems, Inc. business.
+@ Permission to use, copy, modify, and distribute this
+@ software is freely granted, provided that this notice
+@ is preserved.
+@ ====================================================
+@
+@ ====================================================
+@ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+@
+@ Developed at SunPro, a Sun Microsystems, Inc. business.
+@ Permission to use, copy, modify, and distribute this
+@ software is freely granted, provided that this notice
+@ is preserved.
+@ ====================================================
+
+#include <machine/cpu-features.h>
+#include <machine/asm.h>
+
+#define vmov_f64 fconstd
+
+ENTRY(cos)
+    push            {r4, r6, r7, lr}
+    vmov            d0, r0, r1
+    mov             r2, r0
+    mov             r3, r1
+    movw            r1, #0x21fb
+    movt            r1, #0x3fe9
+    mov             r4, r3
+    bic             r3, r3, #0x80000000
+    sub             sp, sp, #48
+    cmp             r3, r1
+    bgt             .Lxgtpio4
+    cmp             r3, #0x3e400000
+    bge             .Lxnottiny
+    vcvt.s32.f64    s15, d0
+    vmov            r3, s15
+    cmp             r3, #0
+    beq             .Lreturnone
+.Lxnottiny:
+    vmov.i64        d1, #0
+    bl              __kernel_cos
+.Lleave_cos:
+    vmov            r0, r1, d0
+.Lleave_cos_direct:
+    add             sp, sp, #48
+    pop             {r4, r6, r7, pc}
+.Lxgtpio4:
+    movw            r2, #0xffff
+    movt            r2, #0x7fef
+    cmp             r3, r2
+    bgt             .LxisNaN
+    movw            r0, #0xd97b
+    movt            r0, #0x4002
+    cmp             r3, r0
+    movw            r2, #0x21fb
+    bgt             .Lxge3pio4
+    cmp             r4, #0
+    movt            r2, #0x3ff9
+    ble             .Lsmallxisnegative
+    vldr            d16, .Lpio2_1
+    cmp             r3, r2
+    vsub.f64        d16, d0, d16
+    beq             .Lxnearpio2
+    vldr            d17, .Lpio2_1t
+.Lfinalizesmallxremainder:
+    vsub.f64        d0, d16, d17
+    vsub.f64        d16, d16, d0
+    vstr            d0, [sp, #8]
+    vsub.f64        d1, d16, d17
+    vstr            d1, [sp, #16]
+.Lnmod3is1:
+    mov             r0, #1
+    bl              __kernel_sin
+    vneg.f64        d0, d0
+    b               .Lleave_cos
+.Lreturnone:
+    mov             r0, #0
+    movw            r1, #0x0000
+    movt            r1, #0x3ff0
+    vmov_f64        d0, #0x70
+    b               .Lleave_cos_direct
+.LxisNaN:
+    vsub.f64        d0, d0, d0
+    b               .Lleave_cos
+.Lxge3pio4:
+    movt            r2, #0x4139
+    cmp             r3, r2
+    bgt             .Lxgigantic
+    vmov_f64        d3, #0x60
+    vldr            d2, .Linvpio2
+    vldr            d18, .Lpio2_1
+    vabs.f64        d16, d0
+    vmla.f64        d3, d16, d2
+    vcvt.s32.f64    s3, d3
+    vcvt.f64.s32    d17, s3
+    vmov            r0, s3
+    cmp             r0, #31
+    vmls.f64        d16, d17, d18
+    vldr            d18, .Lpio2_1t
+    vmul.f64        d18, d17, d18
+    bgt             .Lcomputeremainder
+    ldr             r2, .Lnpio2_hw_ptr
+    sub             lr, r0, #1
+.LPICnpio2_hw0:
+    add             r12, pc, r2
+    ldr             r1, [r12, lr, lsl #2]
+    cmp             r3, r1
+    beq             .Lcomputeremainder
+.Lfinishthirditeration:
+    vsub.f64        d0, d16, d18
+    vstr            d0, [sp, #8]
+.Lfinishcomputingremainder:
+    vsub.f64        d16, d16, d0
+    cmp             r4, #0
+    vsub.f64        d1, d16, d18
+    vstr            d1, [sp, #16]
+    blt             .Lhandlenegativex
+.Lselectregion:
+    and             r0, r0, #3
+    cmp             r0, #1
+    beq             .Lnmod3is1
+    cmp             r0, #2
+    beq             .Lnmod3is2
+    cmp             r0, #0
+    bne             .Lnmod3is0
+    bl              __kernel_cos
+    b               .Lleave_cos
+.Lxgigantic:
+    asr             r2, r3, #20
+    vmov            r6, r7, d0
+    sub             r2, r2, #1040
+    mov             r0, r6
+    sub             r2, r2, #6
+    vldr            d16, .Ltwo24
+    sub             r1, r3, r2, lsl #20
+    vmov            d18, r0, r1
+    vcvt.s32.f64    s15, d18
+    add             r1, sp, #48
+    mov             r3, #3
+    vcvt.f64.s32    d17, s15
+    vsub.f64        d18, d18, d17
+    vstr            d17, [sp, #24]
+    vmul.f64        d18, d18, d16
+    vcvt.s32.f64    s15, d18
+    vcvt.f64.s32    d17, s15
+    vsub.f64        d18, d18, d17
+    vstr            d17, [sp, #32]
+    vmul.f64        d16, d18, d16
+    fcmpzd          d16
+    vstmdb          r1!, {d16}
+    vmrs            APSR_nzcv, fpscr
+    bne             .Lprocessnonzeroterm
+.Lskipzeroterms:
+    vldmdb          r1!, {d16}
+    sub             r3, r3, #1
+    fcmpzd          d16
+    vmrs            APSR_nzcv, fpscr
+    beq             .Lskipzeroterms
+.Lprocessnonzeroterm:
+    ldr             r12, .Ltwo_over_pi_ptr
+    add             r0, sp, #24
+    add             r1, sp, #8
+.LPICtwo_over_pi0:
+    add             lr, pc, r12
+    mov             r12, #2
+    str             lr, [sp, #4]
+    str             r12, [sp]
+    bl              __kernel_rem_pio2
+    cmp             r4, #0
+    vldr            d0, [sp, #8]
+    blt             .Lhandlenegativxalso
+    vldr            d1, [sp, #16]
+    b               .Lselectregion
+.Lxnearpio2:
+    vldr            d17, .Lpio2_2
+    vsub.f64        d16, d16, d17
+    vldr            d17, .Lpio2_2t
+    b               .Lfinalizesmallxremainder
+.Lsmallxisnegative:
+    vldr            d1, .Lpio2_1
+    cmp             r3, r2
+    vadd.f64        d16, d0, d1
+    beq             .Lxnearnegpio2
+    vldr            d17, .Lpio2_1t
+.Lfinalizesmallnegxremainder:
+    vadd.f64        d0, d16, d17
+    vsub.f64        d16, d16, d0
+    vstr            d0, [sp, #8]
+    vadd.f64        d1, d16, d17
+    vstr            d1, [sp, #16]
+.Lnmod3is0:
+    mov             r0, #1
+    bl              __kernel_sin
+    b               .Lleave_cos
+.Lnmod3is2:
+    bl              __kernel_cos
+    vneg.f64        d0, d0
+    b               .Lleave_cos
+.Lcomputeremainder:
+    vsub.f64        d0, d16, d18
+    asr             r1, r3, #20
+    vmov            r2, r3, d0
+    ubfx            r3, r3, #20, #11
+    rsb             r3, r3, r1
+    vstr            d0, [sp, #8]
+    cmp             r3, #16
+    ble             .Lfinishcomputingremainder
+    vldr            d18, .Lpio2_2
+    vmul.f64        d20, d17, d18
+    vsub.f64        d19, d16, d20
+    vsub.f64        d16, d16, d19
+    vsub.f64        d18, d16, d20
+    vldr            d16, .Lpio2_2t
+    vnmls.f64       d18, d17, d16
+    vsub.f64        d0, d19, d18
+    vmov            r2, r3, d0
+    ubfx            r3, r3, #20, #11
+    rsb             r1, r3, r1
+    vstr            d0, [sp, #8]
+    cmp             r1, #49
+    ble             .Lfinishseconditeration
+    vldr            d5, .Lpio2_3
+    vmul.f64        d20, d17, d5
+    vsub.f64        d16, d19, d20
+    vsub.f64        d4, d19, d16
+    vldr            d19, .Lpio2_3t
+    vsub.f64        d18, d4, d20
+    vnmls.f64       d18, d17, d19
+    b               .Lfinishthirditeration
+.Lhandlenegativex:
+    vneg.f64        d0, d0
+    rsb             r0, r0, #0
+    vneg.f64        d1, d1
+    vstr            d0, [sp, #8]
+    vstr            d1, [sp, #16]
+    b               .Lselectregion
+.Lfinishseconditeration:
+    vmov            d16, d19
+    b               .Lfinishcomputingremainder
+.Lxnearnegpio2:
+    vldr            d0, .Lpio2_2
+    vldr            d17, .Lpio2_2t
+    vadd.f64        d16, d16, d0
+    b               .Lfinalizesmallnegxremainder
+.Lhandlenegativxalso:
+    vldr            d6, [sp, #16]
+    vneg.f64        d0, d0
+    rsb             r0, r0, #0
+    vneg.f64        d1, d6
+    vstr            d0, [sp, #8]
+    vstr            d1, [sp, #16]
+    b               .Lselectregion
+
+.align 3
+.Lpio2_1:
+    .word           0x54400000, 0x3ff921fb
+.Lpio2_1t:
+    .word           0x1a626331, 0x3dd0b461
+.Linvpio2:
+    .word           0x6dc9c883, 0x3fe45f30
+.Ltwo24:
+    .word           0x00000000, 0x41700000
+.Lpio2_2:
+    .word           0x1a600000, 0x3dd0b461
+.Lpio2_2t:
+    .word           0x2e037073, 0x3ba3198a
+.Lpio2_3:
+    .word           0x2e000000, 0x3ba3198a
+.Lpio2_3t:
+    .word           0x252049c1, 0x397b839a
+.Lnpio2_hw_ptr:
+    .word           .Lnpio2_hw-(.LPICnpio2_hw0+8)
+.Ltwo_over_pi_ptr:
+    .word           .Ltwo_over_pi-(.LPICtwo_over_pi0+8)
+END(cos)
+
+    .section        .rodata.npio2_hw,"a",%progbits
+    .align          2
+.Lnpio2_hw = . + 0
+    .type           npio2_hw, %object
+    .size           npio2_hw, 128
+npio2_hw:
+    .word           0x3ff921fb
+    .word           0x400921fb
+    .word           0x4012d97c
+    .word           0x401921fb
+    .word           0x401f6a7a
+    .word           0x4022d97c
+    .word           0x4025fdbb
+    .word           0x402921fb
+    .word           0x402c463a
+    .word           0x402f6a7a
+    .word           0x4031475c
+    .word           0x4032d97c
+    .word           0x40346b9c
+    .word           0x4035fdbb
+    .word           0x40378fdb
+    .word           0x403921fb
+    .word           0x403ab41b
+    .word           0x403c463a
+    .word           0x403dd85a
+    .word           0x403f6a7a
+    .word           0x40407e4c
+    .word           0x4041475c
+    .word           0x4042106c
+    .word           0x4042d97c
+    .word           0x4043a28c
+    .word           0x40446b9c
+    .word           0x404534ac
+    .word           0x4045fdbb
+    .word           0x4046c6cb
+    .word           0x40478fdb
+    .word           0x404858eb
+    .word           0x404921fb
+
+    .section        .rodata.two_over_pi,"a",%progbits
+    .align          2
+.Ltwo_over_pi = . + 0
+    .type           two_over_pi, %object
+    .size           two_over_pi, 264
+two_over_pi:
+    .word           0x00a2f983
+    .word           0x006e4e44
+    .word           0x001529fc
+    .word           0x002757d1
+    .word           0x00f534dd
+    .word           0x00c0db62
+    .word           0x0095993c
+    .word           0x00439041
+    .word           0x00fe5163
+    .word           0x00abdebb
+    .word           0x00c561b7
+    .word           0x00246e3a
+    .word           0x00424dd2
+    .word           0x00e00649
+    .word           0x002eea09
+    .word           0x00d1921c
+    .word           0x00fe1deb
+    .word           0x001cb129
+    .word           0x00a73ee8
+    .word           0x008235f5
+    .word           0x002ebb44
+    .word           0x0084e99c
+    .word           0x007026b4
+    .word           0x005f7e41
+    .word           0x003991d6
+    .word           0x00398353
+    .word           0x0039f49c
+    .word           0x00845f8b
+    .word           0x00bdf928
+    .word           0x003b1ff8
+    .word           0x0097ffde
+    .word           0x0005980f
+    .word           0x00ef2f11
+    .word           0x008b5a0a
+    .word           0x006d1f6d
+    .word           0x00367ecf
+    .word           0x0027cb09
+    .word           0x00b74f46
+    .word           0x003f669e
+    .word           0x005fea2d
+    .word           0x007527ba
+    .word           0x00c7ebe5
+    .word           0x00f17b3d
+    .word           0x000739f7
+    .word           0x008a5292
+    .word           0x00ea6bfb
+    .word           0x005fb11f
+    .word           0x008d5d08
+    .word           0x00560330
+    .word           0x0046fc7b
+    .word           0x006babf0
+    .word           0x00cfbc20
+    .word           0x009af436
+    .word           0x001da9e3
+    .word           0x0091615e
+    .word           0x00e61b08
+    .word           0x00659985
+    .word           0x005f14a0
+    .word           0x0068408d
+    .word           0x00ffd880
+    .word           0x004d7327
+    .word           0x00310606
+    .word           0x001556ca
+    .word           0x0073a8c9
+    .word           0x0060e27b
+    .word           0x00c08c6b
diff --git a/libm/arm/s_sin.S b/libm/arm/s_sin.S
new file mode 100644
index 0000000..9c3366c
--- /dev/null
+++ b/libm/arm/s_sin.S
@@ -0,0 +1,414 @@
+@ Copyright (c) 2012, The Linux Foundation. All rights reserved.
+@
+@ Redistribution and use in source and binary forms, with or without
+@ modification, are permitted provided that the following conditions are
+@ met:
+@     * Redistributions of source code must retain the above copyright
+@       notice, this list of conditions and the following disclaimer.
+@     * Redistributions in binary form must reproduce the above
+@       copyright notice, this list of conditions and the following
+@       disclaimer in the documentation and/or other materials provided
+@       with the distribution.
+@     * Neither the name of Code Aurora Forum, Inc. nor the names of its
+@       contributors may be used to endorse or promote products derived
+@       from this software without specific prior written permission.
+@
+@ THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+@ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+@ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
+@ ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+@ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+@ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+@ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+@ BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+@ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+@ OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+@ IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+@
+@ Additional notices preserved for attributions purposes only.
+@
+@ ====================================================
+@ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+@
+@ Developed at SunSoft, a Sun Microsystems, Inc. business.
+@ Permission to use, copy, modify, and distribute this
+@ software is freely granted, provided that this notice
+@ is preserved.
+@ ====================================================
+@
+@ ====================================================
+@ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+@
+@ Developed at SunPro, a Sun Microsystems, Inc. business.
+@ Permission to use, copy, modify, and distribute this
+@ software is freely granted, provided that this notice
+@ is preserved.
+@ ====================================================
+
+#include <machine/cpu-features.h>
+#include <machine/asm.h>
+
+#define vmov_f64 fconstd
+
+ENTRY(sin)
+    push            {r4, r6, r7, lr}
+    vmov            d0, r0, r1
+    mov             r2, r0
+    mov             r3, r1
+    movw            r1, #0x21fb
+    movt            r1, #0x3fe9
+    mov             r4, r3
+    bic             r3, r3, #0x80000000
+    sub             sp, sp, #48
+    cmp             r3, r1
+    bgt             .Lxgtpio4
+    cmp             r3, #0x3e400000
+    bge             .Lxnottiny
+    vcvt.s32.f64    s15, d0
+    vmov            r3, s15
+    cmp             r3, #0
+    bne             .Lxnottiny
+.Lleave_sin:
+    vmov            r0, r1, d0
+    add             sp, sp, #48
+    pop             {r4, r6, r7, pc}
+.Lxgtpio4:
+    movw            r2, #0xffff
+    movt            r2, #0x7fef
+    cmp             r3, r2
+    bgt             .LxisNaN
+    movw            r0, #0xd97b
+    movt            r0, #0x4002
+    cmp             r3, r0
+    movw            r2, #0x21fb
+    bgt             .Lxge3pio4
+    cmp             r4, #0
+    movt            r2, #0x3ff9
+    ble             .Lsmallxisnegative
+    vldr            d16, .Lpio2_1
+    cmp             r3, r2
+    vsub.f64        d16, d0, d16
+    beq             .Lxnearpio2
+    vldr            d17, .Lpio2_1t
+.Lfinalizesmallxremainder:
+    vsub.f64        d0, d16, d17
+    vsub.f64        d16, d16, d0
+    vstr            d0, [sp, #8]
+    vsub.f64        d1, d16, d17
+    vstr            d1, [sp, #16]
+.Lnmod3is1:
+    bl              __kernel_cos
+    b               .Lleave_sin
+.Lxnottiny:
+    vmov.i64        d1, #0
+    mov             r0, #0
+    bl              __kernel_sin
+    b               .Lleave_sin
+.LxisNaN:
+    vsub.f64        d0, d0, d0
+    b               .Lleave_sin
+.Lxge3pio4:
+    movt            r2, #0x4139
+    cmp             r3, r2
+    bgt             .Lxgigantic
+    vmov_f64        d3, #0x60
+    vldr            d2, .Linvpio2
+    vldr            d18, .Lpio2_1
+    vabs.f64        d16, d0
+    vmla.f64        d3, d16, d2
+    vcvt.s32.f64    s3, d3
+    vcvt.f64.s32    d17, s3
+    vmov            r0, s3
+    cmp             r0, #31
+    vmls.f64        d16, d17, d18
+    vldr            d18, .Lpio2_1t
+    vmul.f64        d18, d17, d18
+    bgt             .Lcomputeremainder
+    ldr             r2, .Lnpio2_hw_ptr
+    sub             lr, r0, #1
+.LPICnpio2_hw0:
+    add             r12, pc, r2
+    ldr             r1, [r12, lr, lsl #2]
+    cmp             r3, r1
+    beq             .Lcomputeremainder
+.Lfinishthirditeration:
+    vsub.f64        d0, d16, d18
+    vstr            d0, [sp, #8]
+.Lfinishcomputingremainder:
+    vsub.f64        d16, d16, d0
+    cmp             r4, #0
+    vsub.f64        d1, d16, d18
+    vstr            d1, [sp, #16]
+    blt             .Lhandlenegativex
+.Lselectregion:
+    and             r0, r0, #3
+    cmp             r0, #1
+    beq             .Lnmod3is1
+    cmp             r0, #2
+    beq             .Lnmod3is2
+    cmp             r0, #0
+    bne             .Lnmod3is0
+    mov             r0, #1
+    bl              __kernel_sin
+    b               .Lleave_sin
+.Lxgigantic:
+    asr             r2, r3, #20
+    vmov            r6, r7, d0
+    sub             r2, r2, #1040
+    mov             r0, r6
+    sub             r2, r2, #6
+    vldr            d16, .Ltwo24
+    sub             r1, r3, r2, lsl #20
+    vmov            d18, r0, r1
+    vcvt.s32.f64    s15, d18
+    add             r1, sp, #48
+    mov             r3, #3
+    vcvt.f64.s32    d17, s15
+    vsub.f64        d18, d18, d17
+    vstr            d17, [sp, #24]
+    vmul.f64        d18, d18, d16
+    vcvt.s32.f64    s15, d18
+    vcvt.f64.s32    d17, s15
+    vsub.f64        d18, d18, d17
+    vstr            d17, [sp, #32]
+    vmul.f64        d16, d18, d16
+    fcmpzd          d16
+    vstmdb          r1!, {d16}
+    vmrs            APSR_nzcv, fpscr
+    bne             .Lprocessnonzeroterm
+.Lskipzeroterms:
+    vldmdb          r1!, {d16}
+    sub             r3, r3, #1
+    fcmpzd          d16
+    vmrs            APSR_nzcv, fpscr
+    beq             .Lskipzeroterms
+.Lprocessnonzeroterm:
+    ldr             r12, .Ltwo_over_pi_ptr
+    add             r0, sp, #24
+    add             r1, sp, #8
+.LPICtwo_over_pi0:
+    add             lr, pc, r12
+    mov             r12, #2
+    str             lr, [sp, #4]
+    str             r12, [sp]
+    bl              __kernel_rem_pio2
+    cmp             r4, #0
+    vldr            d0, [sp, #8]
+    blt             .Lhandlenegativexalso
+    vldr            d1, [sp, #16]
+    b               .Lselectregion
+.Lxnearpio2:
+    vldr            d17, .Lpio2_2
+    vsub.f64        d16, d16, d17
+    vldr            d17, .Lpio2_2t
+    b               .Lfinalizesmallxremainder
+.Lsmallxisnegative:
+    vldr            d1, .Lpio2_1
+    cmp             r3, r2
+    vadd.f64        d16, d0, d1
+    beq             .Lxnearnegpio2
+    vldr            d17, .Lpio2_1t
+.Lfinalizesmallnegxremainder:
+    vadd.f64        d0, d16, d17
+    vsub.f64        d16, d16, d0
+    vstr            d0, [sp, #8]
+    vadd.f64        d1, d16, d17
+    vstr            d1, [sp, #16]
+.Lnmod3is0:
+    bl              __kernel_cos
+    vneg.f64        d0, d0
+    b               .Lleave_sin
+.Lnmod3is2:
+    mov             r0, #1
+    bl              __kernel_sin
+    vneg.f64        d0, d0
+    b               .Lleave_sin
+.Lcomputeremainder:
+    vsub.f64        d0, d16, d18
+    asr             r1, r3, #20
+    vmov            r2, r3, d0
+    ubfx            r3, r3, #20, #11
+    rsb             r3, r3, r1
+    vstr            d0, [sp, #8]
+    cmp             r3, #16
+    ble             .Lfinishcomputingremainder
+    vldr            d18, .Lpio2_2
+    vmul.f64        d20, d17, d18
+    vsub.f64        d19, d16, d20
+    vsub.f64        d16, d16, d19
+    vsub.f64        d18, d16, d20
+    vldr            d16, .Lpio2_2t
+    vnmls.f64       d18, d17, d16
+    vsub.f64        d0, d19, d18
+    vmov            r2, r3, d0
+    ubfx            r3, r3, #20, #11
+    rsb             r1, r3, r1
+    vstr            d0, [sp, #8]
+    cmp             r1, #49
+    ble             .Lfinishseconditeration
+    vldr            d5, .Lpio2_3
+    vmul.f64        d20, d17, d5
+    vsub.f64        d16, d19, d20
+    vsub.f64        d4, d19, d16
+    vldr            d19, .Lpio2_3t
+    vsub.f64        d18, d4, d20
+    vnmls.f64       d18, d17, d19
+    b               .Lfinishthirditeration
+.Lhandlenegativex:
+    vneg.f64        d0, d0
+    rsb             r0, r0, #0
+    vneg.f64        d1, d1
+    vstr            d0, [sp, #8]
+    vstr            d1, [sp, #16]
+    b               .Lselectregion
+.Lfinishseconditeration:
+    vmov            d16, d19
+    b               .Lfinishcomputingremainder
+.Lxnearnegpio2:
+    vldr            d0, .Lpio2_2
+    vldr            d17, .Lpio2_2t
+    vadd.f64        d16, d16, d0
+    b               .Lfinalizesmallnegxremainder
+.Lhandlenegativexalso:
+    vldr            d6, [sp, #16]
+    vneg.f64        d0, d0
+    rsb             r0, r0, #0
+    vneg.f64        d1, d6
+    vstr            d0, [sp, #8]
+    vstr            d1, [sp, #16]
+    b               .Lselectregion
+
+.align 3
+.Lpio2_1:
+    .word           0x54400000, 0x3ff921fb
+.Lpio2_1t:
+    .word           0x1a626331, 0x3dd0b461
+.Linvpio2:
+    .word           0x6dc9c883, 0x3fe45f30
+.Ltwo24:
+    .word           0x00000000, 0x41700000
+.Lpio2_2:
+    .word           0x1a600000, 0x3dd0b461
+.Lpio2_2t:
+    .word           0x2e037073, 0x3ba3198a
+.Lpio2_3:
+    .word           0x2e000000, 0x3ba3198a
+.Lpio2_3t:
+    .word           0x252049c1, 0x397b839a
+.Lnpio2_hw_ptr:
+    .word           .Lnpio2_hw-(.LPICnpio2_hw0+8)
+.Ltwo_over_pi_ptr:
+    .word           .Ltwo_over_pi-(.LPICtwo_over_pi0+8)
+END(sin)
+
+    .section        .rodata.npio2_hw,"a",%progbits
+    .align          2
+.Lnpio2_hw = . + 0
+    .type           npio2_hw, %object
+    .size           npio2_hw, 128
+npio2_hw:
+    .word           0x3ff921fb
+    .word           0x400921fb
+    .word           0x4012d97c
+    .word           0x401921fb
+    .word           0x401f6a7a
+    .word           0x4022d97c
+    .word           0x4025fdbb
+    .word           0x402921fb
+    .word           0x402c463a
+    .word           0x402f6a7a
+    .word           0x4031475c
+    .word           0x4032d97c
+    .word           0x40346b9c
+    .word           0x4035fdbb
+    .word           0x40378fdb
+    .word           0x403921fb
+    .word           0x403ab41b
+    .word           0x403c463a
+    .word           0x403dd85a
+    .word           0x403f6a7a
+    .word           0x40407e4c
+    .word           0x4041475c
+    .word           0x4042106c
+    .word           0x4042d97c
+    .word           0x4043a28c
+    .word           0x40446b9c
+    .word           0x404534ac
+    .word           0x4045fdbb
+    .word           0x4046c6cb
+    .word           0x40478fdb
+    .word           0x404858eb
+    .word           0x404921fb
+
+    .section        .rodata.two_over_pi,"a",%progbits
+    .align          2
+.Ltwo_over_pi = . + 0
+    .type           two_over_pi, %object
+    .size           two_over_pi, 264
+two_over_pi:
+    .word           0x00a2f983
+    .word           0x006e4e44
+    .word           0x001529fc
+    .word           0x002757d1
+    .word           0x00f534dd
+    .word           0x00c0db62
+    .word           0x0095993c
+    .word           0x00439041
+    .word           0x00fe5163
+    .word           0x00abdebb
+    .word           0x00c561b7
+    .word           0x00246e3a
+    .word           0x00424dd2
+    .word           0x00e00649
+    .word           0x002eea09
+    .word           0x00d1921c
+    .word           0x00fe1deb
+    .word           0x001cb129
+    .word           0x00a73ee8
+    .word           0x008235f5
+    .word           0x002ebb44
+    .word           0x0084e99c
+    .word           0x007026b4
+    .word           0x005f7e41
+    .word           0x003991d6
+    .word           0x00398353
+    .word           0x0039f49c
+    .word           0x00845f8b
+    .word           0x00bdf928
+    .word           0x003b1ff8
+    .word           0x0097ffde
+    .word           0x0005980f
+    .word           0x00ef2f11
+    .word           0x008b5a0a
+    .word           0x006d1f6d
+    .word           0x00367ecf
+    .word           0x0027cb09
+    .word           0x00b74f46
+    .word           0x003f669e
+    .word           0x005fea2d
+    .word           0x007527ba
+    .word           0x00c7ebe5
+    .word           0x00f17b3d
+    .word           0x000739f7
+    .word           0x008a5292
+    .word           0x00ea6bfb
+    .word           0x005fb11f
+    .word           0x008d5d08
+    .word           0x00560330
+    .word           0x0046fc7b
+    .word           0x006babf0
+    .word           0x00cfbc20
+    .word           0x009af436
+    .word           0x001da9e3
+    .word           0x0091615e
+    .word           0x00e61b08
+    .word           0x00659985
+    .word           0x005f14a0
+    .word           0x0068408d
+    .word           0x00ffd880
+    .word           0x004d7327
+    .word           0x00310606
+    .word           0x001556ca
+    .word           0x0073a8c9
+    .word           0x0060e27b
+    .word           0x00c08c6b
diff --git a/libm/src/e_pow.c b/libm/src/e_pow.c
index d213132..bd82f30 100644
--- a/libm/src/e_pow.c
+++ b/libm/src/e_pow.c
@@ -61,6 +61,14 @@ static char rcsid[] = "$FreeBSD: src/lib/msun/src/e_pow.c,v 1.11 2005/02/04 18:2
 #include "math.h"
 #include "math_private.h"
 
+#if defined(KRAIT_NEON_OPTIMIZATION) || defined(SPARROW_NEON_OPTIMIZATION) || defined(SCORPION_NEON_OPTIMIZATION)
+#if defined(KRAIT_NO_AAPCS_VFP_MODE)
+double pow_neon(double x, double y);
+#else
+double pow_neon(double x, double y, int32_t lx, int32_t hx) __attribute__((pcs("aapcs-vfp")));
+#endif
+#endif
+
 static const double
 bp[] = {1.0, 1.5,},
 dp_h[] = { 0.0, 5.84962487220764160156e-01,}, /* 0x3FE2B803, 0x40000000 */
@@ -108,12 +116,32 @@ __ieee754_pow(double x, double y)
 	ix = hx&0x7fffffff;  iy = hy&0x7fffffff;
 
     /* y==zero: x**0 = 1 */
-	if((iy|ly)==0) return one; 	
 
-    /* +-NaN return x+y */
-	if(ix > 0x7ff00000 || ((ix==0x7ff00000)&&(lx!=0)) ||
-	   iy > 0x7ff00000 || ((iy==0x7ff00000)&&(ly!=0))) 
-		return x+y;	
+    if (ly == 0) {
+        if (hy == ly) {
+            /* y==0.0, x**0 = 1 */
+            return one;
+        }
+        else if (iy > 0x7ff00000) {
+            /* y is NaN, return x+y (NaN) */
+            return x+y;
+        }
+    }
+    else if (iy >= 0x7ff00000) {
+        /* y is NaN, return x+y (NaN) */
+        return x+y;
+    }
+
+    if (lx == 0) {
+        if (ix > 0x7ff00000) {
+            /* x is NaN, return x+y (NaN) */
+            return x+y;
+        }
+    }
+    else if (ix >= 0x7ff00000) {
+        /* x is NaN, return x+y (NaN) */
+        return x+y;
+    }
 
     /* determine if y is an odd int when x < 0
      * yisint = 0	... y is not an integer
@@ -201,6 +229,14 @@ __ieee754_pow(double x, double y)
 	    t1 = u+v;
 	    SET_LOW_WORD(t1,0);
 	    t2 = v-(t1-u);
+#if defined(KRAIT_NEON_OPTIMIZATION) || defined(SPARROW_NEON_OPTIMIZATION) || defined(SCORPION_NEON_OPTIMIZATION)
+	} else if (ix <= 0x40100000 && iy <= 0x40100000 && hy > 0 && hx > 0) {
+#if defined(KRAIT_NO_AAPCS_VFP_MODE)
+		return pow_neon(x,y);
+#else
+		return pow_neon(x,y,lx,hx);
+#endif
+#endif
 	} else {
 	    double ss,s2,s_h,s_l,t_h,t_l;
 	    n = 0;
diff --git a/libm/src/k_cos.c b/libm/src/k_cos.c
index 00916d7..b8cdf8f 100644
--- a/libm/src/k_cos.c
+++ b/libm/src/k_cos.c
@@ -69,6 +69,17 @@ C6  = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
 double
 __kernel_cos(double x, double y)
 {
+#if defined(KRAIT_NEON_OPTIMIZATION)
+	double hz,z,zz,r,w,k;
+
+	z  = x*x;
+	zz = z*z;
+	k = x*y;
+	hz = (float)0.5*z;
+	r  = z*(z*(C1+z*(C2+z*((C3+z*C4)+zz*(C5+z*C6)))));
+	w  = one-hz;
+	return w + (((one-w)-hz) + (r-k));
+#else
 	double hz,z,r,w;
 
 	z  = x*x;
@@ -76,4 +87,5 @@ __kernel_cos(double x, double y)
 	hz = (float)0.5*z;
 	w  = one-hz;
 	return w + (((one-w)-hz) + (z*r-x*y));
+#endif
 }
diff --git a/libm/src/k_sin.c b/libm/src/k_sin.c
index ae06a9d..ee641d4 100644
--- a/libm/src/k_sin.c
+++ b/libm/src/k_sin.c
@@ -60,6 +60,16 @@ S6  =  1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
 double
 __kernel_sin(double x, double y, int iy)
 {
+#if defined(KRAIT_NEON_OPTIMIZATION)
+	double z,zz,r,v;
+
+	z	=  x*x;
+	zz  =  z*z;
+	v	=  z*x;
+	r	=  S2+z*((S3+z*S4)+zz*(S5+z*S6));
+	if(iy==0) return x+v*(S1+z*r);
+	else      return x-((z*(half*y-v*r)-y)-v*S1);
+#else
 	double z,r,v;
 
 	z	=  x*x;
@@ -67,4 +77,5 @@ __kernel_sin(double x, double y, int iy)
 	r	=  S2+z*(S3+z*(S4+z*(S5+z*S6)));
 	if(iy==0) return x+v*(S1+z*r);
 	else      return x-((z*(half*y-v*r)-y)-v*S1);
+#endif
 }
diff --git a/libm/src/math_private.h b/libm/src/math_private.h
index 5f6e088..7cda2e9 100644
--- a/libm/src/math_private.h
+++ b/libm/src/math_private.h
@@ -257,11 +257,19 @@ cpackl(long double x, long double y)
 #define __ieee754_ldexpf ldexpf
 
 /* fdlibm kernel function */
+#if defined(KRAIT_NEON_OPTIMIZATION)
+int	__ieee754_rem_pio2(double,double*) __attribute__((pcs("aapcs-vfp")));
+double	__kernel_sin(double,double,int) __attribute__((pcs("aapcs-vfp")));
+double	__kernel_cos(double,double) __attribute__((pcs("aapcs-vfp")));
+double	__kernel_tan(double,double,int) __attribute__((pcs("aapcs-vfp")));
+int	__kernel_rem_pio2(double*,double*,int,int,int,const int*) __attribute__((pcs("aapcs-vfp")));
+#else
 int	__ieee754_rem_pio2(double,double*);
 double	__kernel_sin(double,double,int);
 double	__kernel_cos(double,double);
 double	__kernel_tan(double,double,int);
 int	__kernel_rem_pio2(double*,double*,int,int,int,const int*);
+#endif
 
 /* float versions of fdlibm kernel functions */
 int	__ieee754_rem_pio2f(float,float*);
diff --git a/linker/Android.mk b/linker/Android.mk
index e8c81db..19f75c8 100644
--- a/linker/Android.mk
+++ b/linker/Android.mk
@@ -43,6 +43,9 @@ endif
 ifeq ($(TARGET_ARCH),mips)
     LOCAL_CFLAGS += -DANDROID_MIPS_LINKER
 endif
+ifeq ($(TARGET_HAVE_TEGRA_ERRATA_657451),true)
+    LOCAL_CFLAGS += -DHAVE_TEGRA_ERRATA_657451
+endif
 
 LOCAL_MODULE:= linker
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
diff --git a/linker/linker.cpp b/linker/linker.cpp
index 46d1335..2362099 100644
--- a/linker/linker.cpp
+++ b/linker/linker.cpp
@@ -641,33 +641,35 @@ static int open_library(const char *name)
     return -1;
 }
 
-// Returns 'true' if the library is prelinked or on failure so we error out
-// either way. We no longer support prelinking.
-static bool is_prelinked(int fd, const char* name)
+typedef struct {
+    long mmap_addr;
+    char tag[4]; /* 'P', 'R', 'E', ' ' */
+} prelink_info_t;
+
+/* Returns the requested base address if the library is prelinked,
+ * and 0 otherwise.  */
+static unsigned long
+is_prelinked(int fd, const char *name)
 {
-    struct prelink_info_t {
-        long mmap_addr;
-        char tag[4]; // "PRE ".
-    };
-
     off_t sz = lseek(fd, -sizeof(prelink_info_t), SEEK_END);
     if (sz < 0) {
-        DL_ERR("lseek failed: %s", strerror(errno));
-        return true;
+        DL_ERR("lseek() failed!");
+        return 0;
     }
 
     prelink_info_t info;
     int rc = TEMP_FAILURE_RETRY(read(fd, &info, sizeof(info)));
     if (rc != sizeof(info)) {
-        DL_ERR("could not read prelink_info_t structure for \"%s\":", name, strerror(errno));
-        return true;
+        WARN("Could not read prelink_info_t structure for `%s`\n", name);
+        return 0;
     }
 
-    if (memcmp(info.tag, "PRE ", 4) == 0) {
-        DL_ERR("prelinked libraries no longer supported: %s", name);
-        return true;
+    if (memcmp(info.tag, "PRE ", 4)) {
+        WARN("`%s` is not a prelinked library\n", name);
+        return 0;
     }
-    return false;
+
+    return (unsigned long)info.mmap_addr;
 }
 
 /* verify_elf_header
@@ -781,10 +783,21 @@ static soinfo* load_library(const char* name)
         return NULL;
     }
 
-    // We no longer support pre-linked libraries.
-    if (is_prelinked(fd.fd, name)) {
+    unsigned req_base = (unsigned) is_prelinked(fd.fd, name);
+    if (req_base == (unsigned)-1) {
+        DL_ERR("%5d can't read end of library: %s: %s", pid, name,
+               strerror(errno));
         return NULL;
     }
+    if (req_base != 0) {
+        TRACE("[ %5d - Prelinked library '%s' requesting base @ 0x%08x ]\n",
+              pid, name, req_base);
+    } else {
+        TRACE("[ %5d - Non-prelinked library '%s' found. ]\n", pid, name);
+    }
+
+    TRACE("[ %5d - '%s' (%s) wants base=0x%08x sz=0x%08x ]\n", pid, name,
+          (req_base ? "prelinked" : "not pre-linked"), req_base, ext_sz);
 
     // Reserve address space for all loadable segments.
     void* load_start = NULL;
@@ -792,6 +805,7 @@ static soinfo* load_library(const char* name)
     Elf32_Addr load_bias = 0;
     ret = phdr_table_reserve_memory(phdr_table,
                                     phdr_count,
+                                    req_base,
                                     &load_start,
                                     &load_size,
                                     &load_bias);
diff --git a/linker/linker_phdr.c b/linker/linker_phdr.c
index 250ca20..36f848b 100644
--- a/linker/linker_phdr.c
+++ b/linker/linker_phdr.c
@@ -218,6 +218,8 @@ Elf32_Addr phdr_table_get_load_size(const Elf32_Phdr* phdr_table,
  * Input:
  *   phdr_table    -> program header table
  *   phdr_count    -> number of entries in the tables
+ *   required_base -> for prelinked libraries, mandatory load address
+ *                    of the first loadable segment. 0 otherwise.
  * Output:
  *   load_start    -> first page of reserved address space range
  *   load_size     -> size in bytes of reserved address space range
@@ -229,18 +231,22 @@ Elf32_Addr phdr_table_get_load_size(const Elf32_Phdr* phdr_table,
 int
 phdr_table_reserve_memory(const Elf32_Phdr* phdr_table,
                           size_t phdr_count,
+                          Elf32_Addr required_base,
                           void** load_start,
                           Elf32_Addr* load_size,
                           Elf32_Addr* load_bias)
 {
     Elf32_Addr size = phdr_table_get_load_size(phdr_table, phdr_count);
+
     if (size == 0) {
         errno = EINVAL;
         return -1;
     }
 
     int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS;
-    void* start = mmap(NULL, size, PROT_NONE, mmap_flags, -1, 0);
+    if (required_base != 0)
+        mmap_flags |= MAP_FIXED;
+    void* start = mmap((void*)required_base, size, PROT_NONE, mmap_flags, -1, 0);
     if (start == MAP_FAILED) {
         return -1;
     }
diff --git a/linker/linker_phdr.h b/linker/linker_phdr.h
index a759262..19e281b 100644
--- a/linker/linker_phdr.h
+++ b/linker/linker_phdr.h
@@ -61,6 +61,7 @@ phdr_table_get_load_size(const Elf32_Phdr* phdr_table,
 int
 phdr_table_reserve_memory(const Elf32_Phdr* phdr_table,
                           size_t phdr_count,
+                          Elf32_Addr required_base,
                           void** load_start,
                           Elf32_Addr* load_size,
                           Elf32_Addr* load_bias);