summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--libc/Android.mk81
-rw-r--r--libc/arch-arm/bionic/clock_gettime.c94
-rw-r--r--libc/arch-arm/bionic/clock_gettime_syscall.S42
-rw-r--r--libc/arch-arm/bionic/gettimeofday.c99
-rw-r--r--libc/arch-arm/bionic/gettimeofday_syscall.S42
-rw-r--r--libc/arch-arm/bionic/memcmp.S81
-rw-r--r--libc/arch-arm/bionic/memcpy.S396
-rw-r--r--libc/arch-arm/bionic/memmove.S526
-rw-r--r--libc/arch-arm/bionic/memset.S88
-rw-r--r--libc/arch-arm/bionic/strlen-armv7.S111
-rw-r--r--libc/arch-arm/include/machine/kernel_user_helper.h68
-rw-r--r--libc/bionic/md5.c2
-rw-r--r--libc/bionic/md5.h7
-rw-r--r--libc/bionic/sha1.c107
-rw-r--r--libc/bionic/system_properties.c11
-rw-r--r--libc/include/errno.h1
-rw-r--r--libc/include/netinet/in6.h34
-rw-r--r--libc/include/pthread.h1
-rw-r--r--libc/include/resolv.h2
-rw-r--r--libc/include/sha1.h8
-rw-r--r--libc/include/string.h33
-rw-r--r--libc/kernel/arch-arm/asm/unistd.h2
-rw-r--r--libc/kernel/common/linux/android_pmem.h17
-rw-r--r--libc/kernel/common/linux/ashmem.h2
-rw-r--r--libc/netbsd/gethnamaddr.c8
-rw-r--r--libc/netbsd/net/getaddrinfo.c17
-rw-r--r--libc/netbsd/net/getnameinfo.c15
-rw-r--r--libc/netbsd/resolv/res_send.c33
-rw-r--r--libc/private/bionic_atomic_arm.h13
-rw-r--r--libc/private/bionic_tls.h11
-rw-r--r--libc/private/logd.h1
-rw-r--r--libc/string/strchr.c15
-rw-r--r--libc/string/strrchr.c16
-rw-r--r--libc/tools/zoneinfo/ZoneCompactor.java7
-rwxr-xr-xlibc/tools/zoneinfo/generate5
-rw-r--r--libc/unistd/getopt_long.c12
-rw-r--r--libc/zoneinfo/zoneinfo.datbin499422 -> 500840 bytes
-rw-r--r--libc/zoneinfo/zoneinfo.idxbin29848 -> 29848 bytes
-rw-r--r--libc/zoneinfo/zoneinfo.version2
-rw-r--r--libm/Android.mk34
-rw-r--r--libm/arm/e_pow.S443
-rw-r--r--libm/arm/s_cos.S419
-rw-r--r--libm/arm/s_sin.S414
-rw-r--r--libm/src/e_pow.c46
-rw-r--r--libm/src/k_cos.c12
-rw-r--r--libm/src/k_sin.c11
-rw-r--r--libm/src/math_private.h8
-rw-r--r--linker/Android.mk3
-rw-r--r--linker/linker.cpp50
-rw-r--r--linker/linker_phdr.c8
-rw-r--r--linker/linker_phdr.h1
51 files changed, 3244 insertions, 215 deletions
diff --git a/libc/Android.mk b/libc/Android.mk
index 6a77deb..c04a0fc 100644
--- a/libc/Android.mk
+++ b/libc/Android.mk
@@ -279,7 +279,6 @@ libc_common_src_files := \
bionic/libc_init_common.c \
bionic/logd_write.c \
bionic/md5.c \
- bionic/memmove_words.c \
bionic/pututline.c \
bionic/realpath.c \
bionic/sched_getaffinity.c \
@@ -384,14 +383,46 @@ libc_common_src_files += \
arch-arm/bionic/memset.S \
arch-arm/bionic/setjmp.S \
arch-arm/bionic/sigsetjmp.S \
- arch-arm/bionic/strlen.c.arm \
arch-arm/bionic/strcpy.S \
arch-arm/bionic/strcmp.S \
arch-arm/bionic/syscall.S \
- string/memmove.c.arm \
- string/bcopy.c \
string/strncmp.c \
unistd/socketcalls.c
+ifeq ($(ARCH_ARM_HAVE_ARMV7A),true)
+libc_common_src_files += arch-arm/bionic/strlen-armv7.S
+else
+libc_common_src_files += arch-arm/bionic/strlen.c.arm
+endif
+
+# Check if we want a neonized version of memmove instead of the
+# current ARM version
+ifeq ($(TARGET_USE_SCORPION_BIONIC_OPTIMIZATION),true)
+libc_common_src_files += \
+ arch-arm/bionic/memmove.S \
+ bionic/memmove_words.c
+else
+ifneq (, $(filter true,$(TARGET_USE_KRAIT_BIONIC_OPTIMIZATION) $(TARGET_USE_SPARROW_BIONIC_OPTIMIZATION)))
+ libc_common_src_files += \
+ arch-arm/bionic/memmove.S
+ else # Other ARM
+ libc_common_src_files += \
+ string/bcopy.c \
+ string/memmove.c.arm \
+ bionic/memmove_words.c
+ endif # !TARGET_USE_KRAIT_BIONIC_OPTIMIZATION
+endif # !TARGET_USE_SCORPION_BIONIC_OPTIMIZATION
+
+# If the kernel supports kernel user helpers for gettimeofday, use
+# that instead.
+ifeq ($(KERNEL_HAS_GETTIMEOFDAY_HELPER),true)
+ libc_common_src_files := $(filter-out arch-arm/syscalls/gettimeofday.S,$(libc_common_src_files))
+ libc_common_src_files := $(filter-out arch-arm/syscalls/clock_gettime.S,$(libc_common_src_files))
+ libc_common_src_files += \
+ arch-arm/bionic/gettimeofday.c \
+ arch-arm/bionic/gettimeofday_syscall.S \
+ arch-arm/bionic/clock_gettime.c \
+ arch-arm/bionic/clock_gettime_syscall.S
+endif # KERNEL_HAS_GETTIMEOFDAY_HELPER
# These files need to be arm so that gdbserver
# can set breakpoints in them without messing
@@ -436,6 +467,7 @@ libc_common_src_files += \
arch-x86/string/strcmp_wrapper.S \
arch-x86/string/strncmp_wrapper.S \
arch-x86/string/strlen_wrapper.S \
+ bionic/memmove_words.c \
string/strcpy.c \
bionic/pthread-atfork.c \
bionic/pthread-rwlocks.c \
@@ -476,6 +508,9 @@ libc_common_src_files += \
arch-mips/string/mips_strlen.c
libc_common_src_files += \
+ bionic/memmove_words.c
+
+libc_common_src_files += \
string/bcopy.c \
string/memcmp.c \
string/strcmp.c \
@@ -555,6 +590,44 @@ ifeq ($(TARGET_ARCH),arm)
ifeq ($(ARCH_ARM_USE_NON_NEON_MEMCPY),true)
libc_common_cflags += -DARCH_ARM_USE_NON_NEON_MEMCPY
endif
+
+ # Add in defines to activate SCORPION_NEON_OPTIMIZATION
+ ifeq ($(TARGET_USE_SCORPION_BIONIC_OPTIMIZATION),true)
+ libc_common_cflags += -DSCORPION_NEON_OPTIMIZATION
+ ifeq ($(TARGET_USE_SCORPION_PLD_SET),true)
+ libc_common_cflags += -DPLDOFFS=$(TARGET_SCORPION_BIONIC_PLDOFFS)
+ libc_common_cflags += -DPLDSIZE=$(TARGET_SCORPION_BIONIC_PLDSIZE)
+ endif
+ endif
+ ifeq ($(TARGET_HAVE_TEGRA_ERRATA_657451),true)
+ libc_common_cflags += -DHAVE_TEGRA_ERRATA_657451
+ endif
+ # Add in defines to activate KRAIT_NEON_OPTIMIZATION
+ ifeq ($(TARGET_USE_KRAIT_BIONIC_OPTIMIZATION),true)
+ libc_common_cflags += -DKRAIT_NEON_OPTIMIZATION
+ ifeq ($(TARGET_USE_KRAIT_PLD_SET),true)
+ libc_common_cflags += -DPLDOFFS=$(TARGET_KRAIT_BIONIC_PLDOFFS)
+ libc_common_cflags += -DPLDTHRESH=$(TARGET_KRAIT_BIONIC_PLDTHRESH)
+ libc_common_cflags += -DPLDSIZE=$(TARGET_KRAIT_BIONIC_PLDSIZE)
+ libc_common_cflags += -DBBTHRESH=$(TARGET_KRAIT_BIONIC_BBTHRESH)
+ endif
+ endif
+ ifeq ($(TARGET_USE_SPARROW_BIONIC_OPTIMIZATION),true)
+ libc_common_cflags += -DSPARROW_NEON_OPTIMIZATION
+ endif
+ ifeq ($(TARGET_CORTEX_CACHE_LINE_32),true)
+ libc_common_cflags += -DCORTEX_CACHE_LINE_32
+ endif
+else # !arm
+ ifeq ($(TARGET_ARCH),x86)
+ libc_crt_target_cflags :=
+ ifeq ($(ARCH_X86_HAVE_SSE2),true)
+ libc_crt_target_cflags += -DUSE_SSE2=1
+ endif
+ ifeq ($(ARCH_X86_HAVE_SSSE3),true)
+ libc_crt_target_cflags += -DUSE_SSSE3=1
+ endif
+ endif # x86
endif # !arm
ifeq ($(TARGET_ARCH),x86)
diff --git a/libc/arch-arm/bionic/clock_gettime.c b/libc/arch-arm/bionic/clock_gettime.c
new file mode 100644
index 0000000..c2917b0
--- /dev/null
+++ b/libc/arch-arm/bionic/clock_gettime.c
@@ -0,0 +1,94 @@
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ * * Neither the name of The Linux Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <string.h>
+#include <stdint.h>
+#include <time.h>
+#include <sys/time.h>
+#include <machine/cpu-features.h>
+#include <machine/kernel_user_helper.h>
+
+int clock_gettime(int clk_id, struct timespec *tp)
+{
+ unsigned prelock, postlock;
+
+ /*
+ * Check if the offset in the kernel user helper page has
+ * the flag set appropriately to show that this feature is
+ * enabled in the kernel. If not, default to the original
+ * clock_gettime system call.
+ *
+ * Also, if this is anything other than CLOCK_MONOTONIC, route
+ * to the original system call as well.
+ */
+ if ((__kuser_gtod_feature != __kuser_gtod_feature_flag) ||
+ (clk_id != CLOCK_MONOTONIC))
+ return clock_gettime_syscall(clk_id, tp);
+
+ if (tp) {
+ struct gtod_t dgtod;
+ uint32_t nscount, cycleoffset;
+ uint32_t mono_sec, mono_nsec;
+ uint64_t cycle_delta;
+
+ do {
+ prelock = __kuser_gtod_seqnum;
+
+ dgtod.cycle_last = __kuser_gtod_cycle_last;
+ dgtod.mask = __kuser_gtod_mask;
+ dgtod.mult = __kuser_gtod_mult;
+ dgtod.shift = __kuser_gtod_shift;
+ dgtod.tv_sec = __kuser_gtod_tv_sec;
+ dgtod.tv_nsec = __kuser_gtod_tv_nsec;
+
+ mono_sec = __kuser_gtod_wtm_tv_sec;
+ mono_nsec = __kuser_gtod_wtm_tv_nsec;
+
+ cycleoffset = __kuser_gtod_offset;
+ cycleoffset += __kuser_gtod_cycle_base;
+ nscount = *(uint32_t *)cycleoffset;
+
+ postlock = __kuser_gtod_seqnum;
+ } while (prelock != postlock);
+
+ cycle_delta = (nscount - dgtod.cycle_last) & dgtod.mask;
+ dgtod.tv_nsec += (cycle_delta * dgtod.mult) >> dgtod.shift;
+ dgtod.tv_sec += mono_sec;
+ dgtod.tv_nsec += mono_nsec;
+ while (dgtod.tv_nsec >= NSEC_PER_SEC) {
+ dgtod.tv_sec += 1;
+ dgtod.tv_nsec -= NSEC_PER_SEC;
+ }
+
+ tp->tv_sec = dgtod.tv_sec;
+ tp->tv_nsec = dgtod.tv_nsec;
+ }
+
+ return 0;
+}
diff --git a/libc/arch-arm/bionic/clock_gettime_syscall.S b/libc/arch-arm/bionic/clock_gettime_syscall.S
new file mode 100644
index 0000000..0b3078a
--- /dev/null
+++ b/libc/arch-arm/bionic/clock_gettime_syscall.S
@@ -0,0 +1,42 @@
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ * * Neither the name of The Linux Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <machine/asm.h>
+#include <sys/linux-syscalls.h>
+
+ENTRY(clock_gettime_syscall)
+ .save {r4, r7}
+ stmfd sp!, {r4, r7}
+ ldr r7, =__NR_clock_gettime
+ swi #0
+ movs r0, r0
+ ldmfd sp!, {r4, r7}
+ bmi __set_syscall_errno
+ bx lr
+END(clock_gettime_syscall)
diff --git a/libc/arch-arm/bionic/gettimeofday.c b/libc/arch-arm/bionic/gettimeofday.c
new file mode 100644
index 0000000..780d6e8
--- /dev/null
+++ b/libc/arch-arm/bionic/gettimeofday.c
@@ -0,0 +1,99 @@
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ * * Neither the name of The Linux Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <string.h>
+#include <stdint.h>
+#include <sys/time.h>
+#include <machine/cpu-features.h>
+#include <machine/kernel_user_helper.h>
+
+int gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+ unsigned prelock, postlock;
+
+ /*
+ * Check if the offset in the kernel user helper page has
+ * the flag set appropriately to show that this feature is
+ * enabled in the kernel. If not, default to the original
+ * gettimeofday system call.
+ */
+ if (__kuser_gtod_feature != __kuser_gtod_feature_flag)
+ return gettimeofday_syscall(tv, tz);
+
+ if (tv) {
+ struct gtod_t dgtod;
+ uint32_t nscount, cycleoffset;
+ uint64_t cycle_delta;
+ uint32_t tmp = 0;
+
+ do {
+ prelock = __kuser_gtod_seqnum;
+
+ dgtod.cycle_last = __kuser_gtod_cycle_last;
+ dgtod.mask = __kuser_gtod_mask;
+ dgtod.mult = __kuser_gtod_mult;
+ dgtod.shift = __kuser_gtod_shift;
+ dgtod.tv_sec = __kuser_gtod_tv_sec;
+ dgtod.tv_nsec = __kuser_gtod_tv_nsec;
+
+ cycleoffset = __kuser_gtod_offset;
+ cycleoffset += __kuser_gtod_cycle_base;
+ nscount = *(uint32_t *)cycleoffset;
+
+ postlock = __kuser_gtod_seqnum;
+ } while (prelock != postlock);
+
+ cycle_delta = (nscount - dgtod.cycle_last) & dgtod.mask;
+ dgtod.tv_nsec += (cycle_delta * dgtod.mult) >> dgtod.shift;
+ while (dgtod.tv_nsec >= NSEC_PER_SEC) {
+ dgtod.tv_sec += 1;
+ dgtod.tv_nsec -= NSEC_PER_SEC;
+ }
+
+ tv->tv_sec = dgtod.tv_sec;
+ asm(" movw %[tmp], #0x4dd3\n\t"
+ " movt %[tmp], #0x1062\n\t"
+ " umull %[tmp], %[x], %[y], %[tmp]\n\t"
+ " lsr %[x], %[x], #6\n\t" :
+ [x] "=r" (tv->tv_usec) :
+ [y] "r" (dgtod.tv_nsec), [tmp] "r" (tmp)
+ : );
+ }
+
+ if (tz) {
+ do {
+ prelock = __kuser_gtod_seqnum;
+ tz->tz_minuteswest = __kuser_gtod_tz_minw;
+ tz->tz_dsttime = __kuser_gtod_tz_dst;
+ postlock = __kuser_gtod_seqnum;
+ } while (prelock != postlock);
+ }
+
+ return 0;
+}
diff --git a/libc/arch-arm/bionic/gettimeofday_syscall.S b/libc/arch-arm/bionic/gettimeofday_syscall.S
new file mode 100644
index 0000000..3a945e2
--- /dev/null
+++ b/libc/arch-arm/bionic/gettimeofday_syscall.S
@@ -0,0 +1,42 @@
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ * * Neither the name of The Linux Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <machine/asm.h>
+#include <sys/linux-syscalls.h>
+
+ENTRY(gettimeofday_syscall)
+ .save {r4, r7}
+ stmfd sp!, {r4, r7}
+ ldr r7, =__NR_gettimeofday
+ swi #0
+ movs r0, r0
+ ldmfd sp!, {r4, r7}
+ bmi __set_syscall_errno
+ bx lr
+END(gettimeofday_syscall)
diff --git a/libc/arch-arm/bionic/memcmp.S b/libc/arch-arm/bionic/memcmp.S
index c872a51..781c4f8 100644
--- a/libc/arch-arm/bionic/memcmp.S
+++ b/libc/arch-arm/bionic/memcmp.S
@@ -1,5 +1,6 @@
/*
- * Copyright (C) 2008 The Android Open Source Project
+ * Copyright (C) 2008, 2011 The Android Open Source Project
+ * Copyright (C) 2010 ST-Ericsson SA
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -30,43 +31,71 @@
#include <machine/asm.h>
/*
- * Optimized memcmp() for ARM9.
- * This would not be optimal on XScale or ARM11, where more prefetching
- * and use of PLD will be needed.
- * The 2 major optimzations here are
- * (1) The main loop compares 16 bytes at a time
- * (2) The loads are scheduled in a way they won't stall
+ * Optimized memcmp() for ARM9 and Cortex-A9
*/
+#if __ARM_ARCH__ >= 7
+#define __ARM_CORTEX
+
+#if defined(CORTEX_CACHE_LINE_32)
+#define CACHE_LINE_SIZE 32
+#else
+#define CACHE_LINE_SIZE 64
+#endif
+
+#endif /* __ARM_ARCH__ */
+
ENTRY(memcmp)
+#if defined(__ARM_CORTEX)
+ pld [r0, #(CACHE_LINE_SIZE * 0)]
+ pld [r0, #(CACHE_LINE_SIZE * 1)]
+#else
PLD (r0, #0)
PLD (r1, #0)
+#endif
/* take of the case where length is 0 or the buffers are the same */
cmp r0, r1
+#if !defined(__ARM_CORTEX)
cmpne r2, #0
+#endif
moveq r0, #0
bxeq lr
+#if defined(__ARM_CORTEX)
+ pld [r1, #(CACHE_LINE_SIZE * 0)]
+ pld [r1, #(CACHE_LINE_SIZE * 1)]
+
+ /* make sure we have at least 8+4 bytes, this simplify things below
+ * and avoid some overhead for small blocks
+ */
+ cmp r2, #(8+4)
+ bmi 10f
+#endif /* __ARM_CORTEX */
+
.save {r4, lr}
/* save registers */
stmfd sp!, {r4, lr}
-
+
+#if !defined(__ARM_CORTEX)
PLD (r0, #32)
PLD (r1, #32)
+#endif
/* since r0 hold the result, move the first source
* pointer somewhere else
*/
mov r4, r0
-
+
+#if !defined(__ARM_CORTEX)
/* make sure we have at least 8+4 bytes, this simplify things below
* and avoid some overhead for small blocks
*/
cmp r2, #(8+4)
bmi 8f
-
+#endif
+
/* align first pointer to word boundary
* offset = -src & 3
*/
@@ -103,8 +132,14 @@ ENTRY(memcmp)
subs r2, r2, #(32 + 4)
bmi 1f
-0: PLD (r4, #64)
+0:
+#if defined(__ARM_CORTEX)
+ pld [r4, #(CACHE_LINE_SIZE * 2)]
+ pld [r1, #(CACHE_LINE_SIZE * 2)]
+#else
+ PLD (r4, #64)
PLD (r1, #64)
+#endif
ldr r0, [r4], #4
ldr lr, [r1, #4]!
eors r0, r0, ip
@@ -170,6 +205,22 @@ ENTRY(memcmp)
9: /* restore registers and return */
ldmfd sp!, {r4, lr}
bx lr
+
+#if defined(__ARM_CORTEX)
+10: /* process less than 12 bytes */
+ cmp r2, #0
+ moveq r0, #0
+ bxeq lr
+ mov r3, r0
+11:
+ ldrb r0, [r3], #1
+ ldrb ip, [r1], #1
+ subs r0, ip
+ bxne lr
+ subs r2, r2, #1
+ bne 11b
+ bx lr
+#endif /* __ARM_CORTEX */
END(memcmp)
@@ -192,8 +243,14 @@ END(memcmp)
bic r1, r1, #3
ldr lr, [r1], #4
-6: PLD (r1, #64)
+6:
+#if defined(__ARM_CORTEX)
+ pld [r1, #(CACHE_LINE_SIZE * 2)]
+ pld [r4, #(CACHE_LINE_SIZE * 2)]
+#else
+ PLD (r1, #64)
PLD (r4, #64)
+#endif
mov ip, lr, lsr #16
ldr lr, [r1], #4
ldr r0, [r4], #4
diff --git a/libc/arch-arm/bionic/memcpy.S b/libc/arch-arm/bionic/memcpy.S
index 8453cc0..80f1bf5 100644
--- a/libc/arch-arm/bionic/memcpy.S
+++ b/libc/arch-arm/bionic/memcpy.S
@@ -2,6 +2,8 @@
* Copyright (C) 2008 The Android Open Source Project
* All rights reserved.
*
+ * Copyright (c) 2009-2011, Code Aurora Forum. All rights reserved.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -30,6 +32,396 @@
#include <machine/asm.h>
#if defined(__ARM_NEON__) && !defined(ARCH_ARM_USE_NON_NEON_MEMCPY)
+#if defined(KRAIT_NEON_OPTIMIZATION)
+ /*
+ * These can be overridden in:
+ * device/<vendor>/<board>/BoardConfig.mk
+ * by setting the following:
+ * TARGET_USE_KRAIT_BIONIC_OPTIMIZATION := true
+ * TARGET_USE_KRAIT_PLD_SET := true
+ * TARGET_KRAIT_BIONIC_PLDOFFS := <pldoffset>
+ * TARGET_KRAIT_BIONIC_PLDSIZE := <pldsize>
+ * TARGET_KRAIT_BIONIC_PLDTHRESH := <pldthreshold>
+ * TARGET_KRAIT_BIONIC_BBTHRESH := <bbthreshold>
+ */
+#ifndef PLDOFFS
+#define PLDOFFS (10)
+#endif
+#ifndef PLDTHRESH
+#define PLDTHRESH (PLDOFFS)
+#endif
+#ifndef BBTHRESH
+#define BBTHRESH (4096/64)
+#endif
+#if (PLDOFFS < 1)
+#error Routine does not support offsets less than 1
+#endif
+#if (PLDTHRESH < PLDOFFS)
+#error PLD threshold must be greater than or equal to the PLD offset
+#endif
+#ifndef PLDSIZE
+#define PLDSIZE (64)
+#endif
+#define NOP_OPCODE (0xe320f000)
+
+ .text
+ .fpu neon
+ .global memcpy
+ .type memcpy, %function
+ .align 5
+memcpy:
+ stmfd sp!, {r0, r9, r10, lr}
+ cmp r2, #4
+ blt .Lneon_lt4
+ cmp r2, #16
+ blt .Lneon_lt16
+ cmp r2, #32
+ blt .Lneon_16
+ cmp r2, #64
+ blt .Lneon_copy_32_a
+
+ mov r12, r2, lsr #6
+ cmp r12, #PLDTHRESH
+ ble .Lneon_copy_64_loop_nopld
+
+ cmp r12, #BBTHRESH
+ ble .Lneon_prime_pump
+
+ add lr, r0, #0x400
+ add r9, r1, #(PLDOFFS*PLDSIZE)
+ sub lr, lr, r9
+ lsl lr, lr, #21
+ lsr lr, lr, #21
+ add lr, lr, #(PLDOFFS*PLDSIZE)
+ cmp r12, lr, lsr #6
+ movle lr, #(PLDOFFS*PLDSIZE)
+
+ movgt r9, #(PLDOFFS)
+ rsbgts r9, r9, lr, lsr #6
+ ble .Lneon_prime_pump
+
+ add r10, r1, lr
+ bic r10, #0x3F
+
+ sub r12, lr, lsr #6
+ cmp r9, r12
+ suble r12, r12, r9
+ movgt r9, r12
+ movgt r12, #0
+
+ pld [r1, #((PLDOFFS-1)*PLDSIZE)]
+ .balignl 64, NOP_OPCODE, 4*2
+.Lneon_copy_64_loop_outer_doublepld:
+ pld [r1, #((PLDOFFS)*PLDSIZE)]
+ vld1.32 {q0, q1}, [r1]!
+ vld1.32 {q2, q3}, [r1]!
+ ldr r3, [r10]
+ subs r9, r9, #1
+ vst1.32 {q0, q1}, [r0]!
+ vst1.32 {q2, q3}, [r0]!
+ add r10, #64
+ bne .Lneon_copy_64_loop_outer_doublepld
+ cmp r12, #0
+ bne .Lneon_copy_64_loop_outer
+ mov r12, lr, lsr #6
+ b .Lneon_copy_64_loop_nopld
+ .balignl 64, NOP_OPCODE, 4*2
+.Lneon_prime_pump:
+ mov lr, #(PLDOFFS*PLDSIZE)
+ add r10, r1, #(PLDOFFS*PLDSIZE)
+ bic r10, #0x3F
+ sub r12, r12, #PLDOFFS
+ pld [r10, #(-1*PLDSIZE)]
+ .balignl 64, NOP_OPCODE, 4*2
+.Lneon_copy_64_loop_outer:
+ vld1.32 {q0, q1}, [r1]!
+ vld1.32 {q2, q3}, [r1]!
+ ldr r3, [r10]
+ subs r12, r12, #1
+ vst1.32 {q0, q1}, [r0]!
+ vst1.32 {q2, q3}, [r0]!
+ add r10, #64
+ bne .Lneon_copy_64_loop_outer
+ mov r12, lr, lsr #6
+ .balignl 64, NOP_OPCODE, 4*2
+.Lneon_copy_64_loop_nopld:
+ vld1.32 {q8, q9}, [r1]!
+ vld1.32 {q10, q11}, [r1]!
+ subs r12, r12, #1
+ vst1.32 {q8, q9}, [r0]!
+ vst1.32 {q10, q11}, [r0]!
+ bne .Lneon_copy_64_loop_nopld
+ ands r2, r2, #0x3f
+ beq .Lneon_exit
+ .balignl 64, NOP_OPCODE, 4*2
+.Lneon_copy_32_a:
+ movs r12, r2, lsl #27
+ bcc .Lneon_16
+ vld1.32 {q0,q1}, [r1]!
+ vst1.32 {q0,q1}, [r0]!
+ .balignl 64, NOP_OPCODE, 4*2
+.Lneon_16:
+ bpl .Lneon_lt16
+ vld1.32 {q8}, [r1]!
+ vst1.32 {q8}, [r0]!
+ ands r2, r2, #0x0f
+ beq .Lneon_exit
+ .balignl 64, NOP_OPCODE, 4*2
+.Lneon_lt16:
+ movs r12, r2, lsl #29
+ ldrcs r3, [r1], #4
+ ldrcs r12, [r1], #4
+ strcs r3, [r0], #4
+ strcs r12, [r0], #4
+ ldrmi r3, [r1], #4
+ strmi r3, [r0], #4
+ .balignl 64, NOP_OPCODE, 4*2
+.Lneon_lt4:
+ movs r2, r2, lsl #31
+ ldrcsh r3, [r1], #2
+ strcsh r3, [r0], #2
+ ldrmib r12, [r1]
+ strmib r12, [r0]
+ .balignl 64, NOP_OPCODE, 4*2
+.Lneon_exit:
+ ldmfd sp!, {r0, r9, r10, lr}
+ bx lr
+ .end
+#elif defined(SCORPION_NEON_OPTIMIZATION)
+ /*
+ * These can be overridden in:
+ * device/<vendor>/<board>/BoardConfig.mk
+ * by setting the following:
+ * TARGET_USE_SCORPION_BIONIC_OPTIMIZATION := true
+ * TARGET_USE_SCORPION_PLD_SET := true
+ * TARGET_SCORPION_BIONIC_PLDOFFS := <pldoffset>
+ * TARGET_SCORPION_BIONIC_PLDSIZE := <pldsize>
+ */
+#ifndef PLDOFFS
+#define PLDOFFS (6)
+#endif
+#ifndef PLDSIZE
+#define PLDSIZE (128) /* L2 cache line size */
+#endif
+ .code 32
+ .align 5
+ .globl memcpy
+ .func
+memcpy:
+ push {r0}
+ cmp r2, #4
+ blt .Lneon_lt4
+ cmp r2, #16
+ blt .Lneon_lt16
+ cmp r2, #32
+ blt .Lneon_16
+ cmp r2, #128
+ blt .Lneon_copy_32_a
+ /* Copy blocks of 128-bytes (word-aligned) at a time*/
+ /* Code below is optimized for PLDSIZE=128 only */
+ mov r12, r2, lsr #7
+ cmp r12, #PLDOFFS
+ ble .Lneon_copy_128_loop_nopld
+ sub r12, #PLDOFFS
+ pld [r1, #(PLDOFFS-1)*PLDSIZE]
+.Lneon_copy_128_loop_outer:
+ pld [r1, #(PLDOFFS*PLDSIZE)]
+ vld1.32 {q0, q1}, [r1]!
+ vld1.32 {q2, q3}, [r1]!
+ vld1.32 {q8, q9}, [r1]!
+ vld1.32 {q10, q11}, [r1]!
+ subs r12, r12, #1
+ vst1.32 {q0, q1}, [r0]!
+ vst1.32 {q2, q3}, [r0]!
+ vst1.32 {q8, q9}, [r0]!
+ vst1.32 {q10, q11}, [r0]!
+ bne .Lneon_copy_128_loop_outer
+ mov r12, #PLDOFFS
+.Lneon_copy_128_loop_nopld:
+ vld1.32 {q0, q1}, [r1]!
+ vld1.32 {q2, q3}, [r1]!
+ vld1.32 {q8, q9}, [r1]!
+ vld1.32 {q10, q11}, [r1]!
+ subs r12, r12, #1
+ vst1.32 {q0, q1}, [r0]!
+ vst1.32 {q2, q3}, [r0]!
+ vst1.32 {q8, q9}, [r0]!
+ vst1.32 {q10, q11}, [r0]!
+ bne .Lneon_copy_128_loop_nopld
+ ands r2, r2, #0x7f
+ beq .Lneon_exit
+ cmp r2, #32
+ blt .Lneon_16
+ nop
+ /* Copy blocks of 32-bytes (word aligned) at a time*/
+.Lneon_copy_32_a:
+ mov r12, r2, lsr #5
+.Lneon_copy_32_loop_a:
+ vld1.32 {q0,q1}, [r1]!
+ subs r12, r12, #1
+ vst1.32 {q0,q1}, [r0]!
+ bne .Lneon_copy_32_loop_a
+ ands r2, r2, #0x1f
+ beq .Lneon_exit
+.Lneon_16:
+ subs r2, r2, #16
+ blt .Lneon_lt16
+ vld1.32 {q8}, [r1]!
+ vst1.32 {q8}, [r0]!
+ beq .Lneon_exit
+.Lneon_lt16:
+ movs r12, r2, lsl #29
+ bcc .Lneon_skip8
+ ldr r3, [r1], #4
+ ldr r12, [r1], #4
+ str r3, [r0], #4
+ str r12, [r0], #4
+.Lneon_skip8:
+ bpl .Lneon_lt4
+ ldr r3, [r1], #4
+ str r3, [r0], #4
+.Lneon_lt4:
+ movs r2, r2, lsl #31
+ bcc .Lneon_lt2
+ ldrh r3, [r1], #2
+ strh r3, [r0], #2
+.Lneon_lt2:
+ bpl .Lneon_exit
+ ldrb r12, [r1]
+ strb r12, [r0]
+.Lneon_exit:
+ pop {r0}
+ bx lr
+ .endfunc
+ .end
+#else
+#if defined(CORTEX_CACHE_LINE_32)
+ /*
+ *This can be enabled by setting flag
+ *TARGET_CORTEX_CACHE_LINE_32 in
+ *device/<vendor>/<board>/BoardConfig.mk
+ */
+ .text
+ .fpu neon
+
+ .global memcpy
+ .type memcpy, %function
+ .align 4
+
+/* a prefetch distance of 4 cache-lines works best experimentally */
+#define CACHE_LINE_SIZE 32
+memcpy:
+ .fnstart
+ .save {r0, lr}
+ stmfd sp!, {r0, lr}
+
+ /* start preloading as early as possible */
+ pld [r1, #(CACHE_LINE_SIZE*0)]
+ pld [r1, #(CACHE_LINE_SIZE*1)]
+
+ /* do we have at least 16-bytes to copy (needed for alignment below) */
+ cmp r2, #16
+ blo 5f
+
+ /* align destination to half cache-line for the write-buffer */
+ rsb r3, r0, #0
+ ands r3, r3, #0xF
+ beq 0f
+
+ /* copy up to 15-bytes (count in r3) */
+ sub r2, r2, r3
+ movs ip, r3, lsl #31
+ ldrmib lr, [r1], #1
+ strmib lr, [r0], #1
+ ldrcsb ip, [r1], #1
+ ldrcsb lr, [r1], #1
+ strcsb ip, [r0], #1
+ strcsb lr, [r0], #1
+ movs ip, r3, lsl #29
+ bge 1f
+ // copies 4 bytes, destination 32-bits aligned
+ vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]!
+ vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]!
+1: bcc 2f
+ // copies 8 bytes, destination 64-bits aligned
+ vld1.8 {d0}, [r1]!
+ vst1.8 {d0}, [r0, :64]!
+2:
+
+0: /* preload immediately the next cache line, which we may need */
+ pld [r1, #(CACHE_LINE_SIZE*0)]
+ pld [r1, #(CACHE_LINE_SIZE*1)]
+
+ /* make sure we have at least 128 bytes to copy */
+ subs r2, r2, #128
+ blo 2f
+
+ /* preload all the cache lines we need.
+ * NOTE: the number of pld below depends on PREFETCH_DISTANCE,
+ * ideally would would increase the distance in the main loop to
+ * avoid the goofy code below. In practice this doesn't seem to make
+ * a big difference.
+ */
+ pld [r1, #(CACHE_LINE_SIZE*2)]
+ pld [r1, #(CACHE_LINE_SIZE*3)]
+ pld [r1, #(CACHE_LINE_SIZE*4)]
+
+ .align 3
+1: /* The main loop copies 128 bytes at a time */
+ subs r2, r2, #128
+ vld1.8 {d0 - d3}, [r1]!
+ vld1.8 {d4 - d7}, [r1]!
+ pld [r1, #(CACHE_LINE_SIZE*1)]
+ pld [r1, #(CACHE_LINE_SIZE*2)]
+ vld1.8 {d16 - d19}, [r1]!
+ vld1.8 {d20 - d23}, [r1]!
+ pld [r1, #(CACHE_LINE_SIZE*1)]
+ pld [r1, #(CACHE_LINE_SIZE*2)]
+ vst1.8 {d0 - d3}, [r0, :128]!
+ vst1.8 {d4 - d7}, [r0, :128]!
+ vst1.8 {d16 - d19}, [r0, :128]!
+ vst1.8 {d20 - d23}, [r0, :128]!
+ bhs 1b
+
+2: /* fix-up the remaining count and make sure we have >= 32 bytes left */
+ add r2, r2, #128
+ subs r2, r2, #32
+ blo 4f
+
+3: /* 32 bytes at a time. These cache lines were already preloaded */
+ vld1.8 {d0 - d3}, [r1]!
+ subs r2, r2, #32
+ vst1.8 {d0 - d3}, [r0, :128]!
+ bhs 3b
+
+4: /* less than 32 left */
+ add r2, r2, #32
+ tst r2, #0x10
+ beq 5f
+ // copies 16 bytes, 128-bits aligned
+ vld1.8 {d0, d1}, [r1]!
+ vst1.8 {d0, d1}, [r0, :128]!
+
+5: /* copy up to 15-bytes (count in r2) */
+ movs ip, r2, lsl #29
+ bcc 1f
+ vld1.8 {d0}, [r1]!
+ vst1.8 {d0}, [r0]!
+1: bge 2f
+ vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]!
+ vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]!
+2: movs ip, r2, lsl #31
+ ldrmib r3, [r1], #1
+ ldrcsb ip, [r1], #1
+ ldrcsb lr, [r1], #1
+ strmib r3, [r0], #1
+ strcsb ip, [r0], #1
+ strcsb lr, [r0], #1
+
+ ldmfd sp!, {r0, lr}
+ bx lr
+ .fnend
+#else /*!CORTEX_CACHE_LINE_32*/
.text
.fpu neon
@@ -165,8 +557,8 @@ ENTRY(memcpy)
ldmfd sp!, {r0, lr}
bx lr
END(memcpy)
-
-
+#endif /* CORTEX_CACHE_LINE_32 */
+#endif /* !SCORPION_NEON_OPTIMIZATION */
#else /* __ARM_ARCH__ < 7 */
diff --git a/libc/arch-arm/bionic/memmove.S b/libc/arch-arm/bionic/memmove.S
new file mode 100644
index 0000000..937d14b
--- /dev/null
+++ b/libc/arch-arm/bionic/memmove.S
@@ -0,0 +1,526 @@
+/***************************************************************************
+ Copyright (c) 2009-2012 Code Aurora Forum. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Code Aurora nor the names of its contributors may
+ be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/***************************************************************************
+ * Neon memmove: Attempts to do a memmove with Neon registers if possible,
+ * Inputs:
+ * dest: The destination buffer
+ * src: The source buffer
+ * n: The size of the buffer to transfer
+ * Outputs:
+ *
+ ***************************************************************************/
+
+#include <machine/cpu-features.h>
+
+#if defined(KRAIT_NEON_OPTIMIZATION) || defined(SPARROW_NEON_OPTIMIZATION)
+ /*
+ * These can be overridden in:
+ * device/<vendor>/<board>/BoardConfig.mk
+ * by setting the following:
+ * TARGET_USE_KRAIT_BIONIC_OPTIMIZATION := true
+ * TARGET_USE_KRAIT_PLD_SET := true
+ * TARGET_KRAIT_BIONIC_PLDOFFS := <pldoffset>
+ * TARGET_KRAIT_BIONIC_PLDSIZE := <pldsize>
+ * TARGET_KRAIT_BIONIC_PLDTHRESH := <pldthreshold>
+ */
+#ifndef PLDOFFS
+#define PLDOFFS (10)
+#endif
+#ifndef PLDTHRESH
+#define PLDTHRESH (PLDOFFS)
+#endif
+#if (PLDOFFS < 5)
+#error Routine does not support offsets less than 5
+#endif
+#if (PLDTHRESH < PLDOFFS)
+#error PLD threshold must be greater than or equal to the PLD offset
+#endif
+#ifndef PLDSIZE
+#define PLDSIZE (64)
+#endif
+#define NOP_OPCODE (0xe320f000)
+
+ .code 32
+ .align 5
+ .global memmove
+ .type memmove, %function
+
+ .global _memmove_words
+ .type _memmove_words, %function
+
+ .global bcopy
+ .type bcopy, %function
+
+bcopy:
+ mov r12, r0
+ mov r0, r1
+ mov r1, r12
+ .balignl 64, NOP_OPCODE, 4*2
+memmove:
+_memmove_words:
+.Lneon_memmove_cmf:
+ subs r12, r0, r1
+ bxeq lr
+ cmphi r2, r12
+ bls memcpy /* Use memcpy for non-overlapping areas */
+
+ push {r0}
+
+.Lneon_back_to_front_copy:
+ add r0, r0, r2
+ add r1, r1, r2
+ cmp r2, #4
+ bgt .Lneon_b2f_gt4
+ cmp r2, #0
+.Lneon_b2f_smallcopy_loop:
+ beq .Lneon_memmove_done
+ ldrb r12, [r1, #-1]!
+ subs r2, r2, #1
+ strb r12, [r0, #-1]!
+ b .Lneon_b2f_smallcopy_loop
+.Lneon_b2f_gt4:
+ sub r3, r0, r1
+ cmp r2, r3
+ movle r12, r2
+ movgt r12, r3
+ cmp r12, #64
+ bge .Lneon_b2f_copy_64
+ cmp r12, #32
+ bge .Lneon_b2f_copy_32
+ cmp r12, #8
+ bge .Lneon_b2f_copy_8
+ cmp r12, #4
+ bge .Lneon_b2f_copy_4
+ b .Lneon_b2f_copy_1
+.Lneon_b2f_copy_64:
+ sub r1, r1, #64 /* Predecrement */
+ sub r0, r0, #64
+ movs r12, r2, lsr #6
+ cmp r12, #PLDTHRESH
+ ble .Lneon_b2f_copy_64_loop_nopld
+ sub r12, #PLDOFFS
+ pld [r1, #-(PLDOFFS-5)*PLDSIZE]
+ pld [r1, #-(PLDOFFS-4)*PLDSIZE]
+ pld [r1, #-(PLDOFFS-3)*PLDSIZE]
+ pld [r1, #-(PLDOFFS-2)*PLDSIZE]
+ pld [r1, #-(PLDOFFS-1)*PLDSIZE]
+ .balignl 64, NOP_OPCODE, 4*2
+.Lneon_b2f_copy_64_loop_outer:
+ pld [r1, #-(PLDOFFS)*PLDSIZE]
+ vld1.32 {q0, q1}, [r1]!
+ vld1.32 {q2, q3}, [r1]
+ subs r12, r12, #1
+ vst1.32 {q0, q1}, [r0]!
+ sub r1, r1, #96 /* Post-fixup and predecrement */
+ vst1.32 {q2, q3}, [r0]
+ sub r0, r0, #96
+ bne .Lneon_b2f_copy_64_loop_outer
+ mov r12, #PLDOFFS
+ .balignl 64, NOP_OPCODE, 4*2
+.Lneon_b2f_copy_64_loop_nopld:
+ vld1.32 {q8, q9}, [r1]!
+ vld1.32 {q10, q11}, [r1]
+ subs r12, r12, #1
+ vst1.32 {q8, q9}, [r0]!
+ sub r1, r1, #96 /* Post-fixup and predecrement */
+ vst1.32 {q10, q11}, [r0]
+ sub r0, r0, #96
+ bne .Lneon_b2f_copy_64_loop_nopld
+ ands r2, r2, #0x3f
+ beq .Lneon_memmove_done
+ add r1, r1, #64 /* Post-fixup */
+ add r0, r0, #64
+ cmp r2, #32
+ blt .Lneon_b2f_copy_finish
+.Lneon_b2f_copy_32:
+ mov r12, r2, lsr #5
+.Lneon_b2f_copy_32_loop:
+ sub r1, r1, #32 /* Predecrement */
+ sub r0, r0, #32
+ vld1.32 {q0,q1}, [r1]
+ subs r12, r12, #1
+ vst1.32 {q0,q1}, [r0]
+ bne .Lneon_b2f_copy_32_loop
+ ands r2, r2, #0x1f
+ beq .Lneon_memmove_done
+.Lneon_b2f_copy_finish:
+.Lneon_b2f_copy_8:
+ movs r12, r2, lsr #0x3
+ beq .Lneon_b2f_copy_4
+ .balignl 64, NOP_OPCODE, 4*2
+.Lneon_b2f_copy_8_loop:
+ sub r1, r1, #8 /* Predecrement */
+ sub r0, r0, #8
+ vld1.32 {d0}, [r1]
+ subs r12, r12, #1
+ vst1.32 {d0}, [r0]
+ bne .Lneon_b2f_copy_8_loop
+ ands r2, r2, #0x7
+ beq .Lneon_memmove_done
+.Lneon_b2f_copy_4:
+ movs r12, r2, lsr #0x2
+ beq .Lneon_b2f_copy_1
+.Lneon_b2f_copy_4_loop:
+ ldr r3, [r1, #-4]!
+ subs r12, r12, #1
+ str r3, [r0, #-4]!
+ bne .Lneon_b2f_copy_4_loop
+ ands r2, r2, #0x3
+.Lneon_b2f_copy_1:
+ cmp r2, #0
+ beq .Lneon_memmove_done
+ .balignl 64, NOP_OPCODE, 4*2
+.Lneon_b2f_copy_1_loop:
+ ldrb r12, [r1, #-1]!
+ subs r2, r2, #1
+ strb r12, [r0, #-1]!
+ bne .Lneon_b2f_copy_1_loop
+
+.Lneon_memmove_done:
+ pop {r0}
+ bx lr
+
+ .end
+
+#elif defined(SCORPION_NEON_OPTIMIZATION)
+ /*
+ * These can be overridden in:
+ * device/<vendor>/<board>/BoardConfig.mk
+ * by setting the following:
+ * TARGET_USE_SCORPION_BIONIC_OPTIMIZATION := true
+ * TARGET_USE_SCORPION_PLD_SET := true
+ * TARGET_SCORPION_BIONIC_PLDOFFS := <pldoffset>
+ * TARGET_SCORPION_BIONIC_PLDSIZE := <pldsize>
+ */
+#ifndef PLDOFFS
+#define PLDOFFS (6)
+#endif
+#ifndef PLDSIZE
+#define PLDSIZE (128) /* L2 cache line size */
+#endif
+
+ .code 32
+ .align 5
+ .global memmove
+ .type memmove, %function
+
+ .global bcopy
+ .type bcopy, %function
+
+bcopy:
+ mov r12, r0
+ mov r0, r1
+ mov r1, r12
+memmove:
+ push {r0}
+
+ /*
+ * The requirements for memmove state that the function should
+ * operate as if data were being copied from the source to a
+ * buffer, then to the destination. This is to allow a user
+ * to copy data from a source and target that overlap.
+ *
+ * We can't just do byte copies front-to-back automatically, since
+ * there's a good chance we may have an overlap (why else would someone
+ * intentionally use memmove then?).
+ *
+ * We'll break this into two parts. Front-to-back, or back-to-front
+ * copies.
+ */
+.Lneon_memmove_cmf:
+ cmp r0, r1
+ blt .Lneon_front_to_back_copy
+ bgt .Lneon_back_to_front_copy
+ b .Lneon_memmove_done
+
+ /* #############################################################
+ * Front to Back copy
+ */
+.Lneon_front_to_back_copy:
+ /*
+ * For small copies, just do a quick memcpy. We can do this for
+ * front-to-back copies, aligned or unaligned, since we're only
+ * doing 1 byte at a time...
+ */
+ cmp r2, #4
+ bgt .Lneon_f2b_gt4
+ cmp r2, #0
+.Lneon_f2b_smallcopy_loop:
+ beq .Lneon_memmove_done
+ ldrb r12, [r1], #1
+ subs r2, r2, #1
+ strb r12, [r0], #1
+ b .Lneon_f2b_smallcopy_loop
+.Lneon_f2b_gt4:
+ /* The window size is in r3. */
+ sub r3, r1, r0
+ /* #############################################################
+ * Front to Back copy
+ */
+ /*
+ * Note that we can't just route based on the size in r2. If that's
+ * larger than the overlap window in r3, we could potentially
+ * (and likely!) destroy data we're copying.
+ */
+ cmp r2, r3
+ movle r12, r2
+ movgt r12, r3
+ cmp r12, #256
+ bge .Lneon_f2b_copy_128
+ cmp r12, #64
+ bge .Lneon_f2b_copy_32
+ cmp r12, #16
+ bge .Lneon_f2b_copy_16
+ cmp r12, #8
+ bge .Lneon_f2b_copy_8
+ cmp r12, #4
+ bge .Lneon_f2b_copy_4
+ b .Lneon_f2b_copy_1
+ nop
+.Lneon_f2b_copy_128:
+ mov r12, r2, lsr #7
+ cmp r12, #PLDOFFS
+ ble .Lneon_f2b_copy_128_loop_nopld
+ sub r12, #PLDOFFS
+ pld [r1, #(PLDOFFS-1)*PLDSIZE]
+.Lneon_f2b_copy_128_loop_outer:
+ pld [r1, #(PLDOFFS*PLDSIZE)]
+ vld1.32 {q0,q1}, [r1]!
+ vld1.32 {q2,q3}, [r1]!
+ vld1.32 {q8,q9}, [r1]!
+ vld1.32 {q10,q11}, [r1]!
+ subs r12, r12, #1
+ vst1.32 {q0,q1}, [r0]!
+ vst1.32 {q2,q3}, [r0]!
+ vst1.32 {q8,q9}, [r0]!
+ vst1.32 {q10,q11}, [r0]!
+ bne .Lneon_f2b_copy_128_loop_outer
+ mov r12, #PLDOFFS
+.Lneon_f2b_copy_128_loop_nopld:
+ vld1.32 {q0,q1}, [r1]!
+ vld1.32 {q2,q3}, [r1]!
+ vld1.32 {q8,q9}, [r1]!
+ vld1.32 {q10,q11}, [r1]!
+ subs r12, r12, #1
+ vst1.32 {q0,q1}, [r0]!
+ vst1.32 {q2,q3}, [r0]!
+ vst1.32 {q8,q9}, [r0]!
+ vst1.32 {q10,q11}, [r0]!
+ bne .Lneon_f2b_copy_128_loop_nopld
+ ands r2, r2, #0x7f
+ beq .Lneon_memmove_done
+ cmp r2, #32
+ bge .Lneon_f2b_copy_32
+ b .Lneon_f2b_copy_finish
+.Lneon_f2b_copy_32:
+ mov r12, r2, lsr #5
+.Lneon_f2b_copy_32_loop:
+ vld1.32 {q0,q1}, [r1]!
+ subs r12, r12, #1
+ vst1.32 {q0,q1}, [r0]!
+ bne .Lneon_f2b_copy_32_loop
+ ands r2, r2, #0x1f
+ beq .Lneon_memmove_done
+.Lneon_f2b_copy_finish:
+.Lneon_f2b_copy_16:
+ movs r12, r2, lsr #4
+ beq .Lneon_f2b_copy_8
+.Lneon_f2b_copy_16_loop:
+ vld1.32 {q0}, [r1]!
+ subs r12, r12, #1
+ vst1.32 {q0}, [r0]!
+ bne .Lneon_f2b_copy_16_loop
+ ands r2, r2, #0xf
+ beq .Lneon_memmove_done
+.Lneon_f2b_copy_8:
+ movs r12, r2, lsr #3
+ beq .Lneon_f2b_copy_4
+.Lneon_f2b_copy_8_loop:
+ vld1.32 {d0}, [r1]!
+ subs r12, r12, #1
+ vst1.32 {d0}, [r0]!
+ bne .Lneon_f2b_copy_8_loop
+ ands r2, r2, #0x7
+ beq .Lneon_memmove_done
+.Lneon_f2b_copy_4:
+ movs r12, r2, lsr #2
+ beq .Lneon_f2b_copy_1
+.Lneon_f2b_copy_4_loop:
+ ldr r3, [r1], #4
+ subs r12, r12, #1
+ str r3, [r0], #4
+ bne .Lneon_f2b_copy_4_loop
+ ands r2, r2, #0x3
+ nop
+.Lneon_f2b_copy_1:
+ cmp r2, #0
+ beq .Lneon_memmove_done
+.Lneon_f2b_copy_1_loop:
+ ldrb r12, [r1], #1
+ subs r2, r2, #1
+ strb r12, [r0], #1
+ bne .Lneon_f2b_copy_1_loop
+.Lneon_f2b_finish:
+ b .Lneon_memmove_done
+
+ /* #############################################################
+ * Back to Front copy
+ */
+.Lneon_back_to_front_copy:
+ /*
+ * Here, we'll want to shift to the end of the buffers. This
+ * actually points us one past where we need to go, but since
+ * we'll pre-decrement throughout, this will be fine.
+ */
+ add r0, r0, r2
+ add r1, r1, r2
+ cmp r2, #4
+ bgt .Lneon_b2f_gt4
+ cmp r2, #0
+.Lneon_b2f_smallcopy_loop:
+ beq .Lneon_memmove_done
+ ldrb r12, [r1, #-1]!
+ subs r2, r2, #1
+ strb r12, [r0, #-1]!
+ b .Lneon_b2f_smallcopy_loop
+.Lneon_b2f_gt4:
+ /*
+ * The minimum of the overlap window size and the copy size
+ * is in r3.
+ */
+ sub r3, r0, r1
+ /*
+ * #############################################################
+ * Back to Front copy -
+ */
+ cmp r2, r3
+ movle r12, r2
+ movgt r12, r3
+ cmp r12, #256
+ bge .Lneon_b2f_copy_128
+ cmp r12, #64
+ bge .Lneon_b2f_copy_32
+ cmp r12, #8
+ bge .Lneon_b2f_copy_8
+ cmp r12, #4
+ bge .Lneon_b2f_copy_4
+ b .Lneon_b2f_copy_1
+ nop
+.Lneon_b2f_copy_128:
+ movs r12, r2, lsr #7
+ cmp r12, #PLDOFFS
+ ble .Lneon_b2f_copy_128_loop_nopld
+ sub r12, #PLDOFFS
+ pld [r1, #-(PLDOFFS-1)*PLDSIZE]
+.Lneon_b2f_copy_128_loop_outer:
+ pld [r1, #-(PLDOFFS*PLDSIZE)]
+ sub r1, r1, #128
+ sub r0, r0, #128
+ vld1.32 {q0, q1}, [r1]!
+ vld1.32 {q2, q3}, [r1]!
+ vld1.32 {q8, q9}, [r1]!
+ vld1.32 {q10, q11}, [r1]!
+ subs r12, r12, #1
+ vst1.32 {q0, q1}, [r0]!
+ vst1.32 {q2, q3}, [r0]!
+ vst1.32 {q8, q9}, [r0]!
+ vst1.32 {q10, q11}, [r0]!
+ sub r1, r1, #128
+ sub r0, r0, #128
+ bne .Lneon_b2f_copy_128_loop_outer
+ mov r12, #PLDOFFS
+.Lneon_b2f_copy_128_loop_nopld:
+ sub r1, r1, #128
+ sub r0, r0, #128
+ vld1.32 {q0, q1}, [r1]!
+ vld1.32 {q2, q3}, [r1]!
+ vld1.32 {q8, q9}, [r1]!
+ vld1.32 {q10, q11}, [r1]!
+ subs r12, r12, #1
+ vst1.32 {q0, q1}, [r0]!
+ vst1.32 {q2, q3}, [r0]!
+ vst1.32 {q8, q9}, [r0]!
+ vst1.32 {q10, q11}, [r0]!
+ sub r1, r1, #128
+ sub r0, r0, #128
+ bne .Lneon_b2f_copy_128_loop_nopld
+ ands r2, r2, #0x7f
+ beq .Lneon_memmove_done
+ cmp r2, #32
+ bge .Lneon_b2f_copy_32
+ b .Lneon_b2f_copy_finish
+.Lneon_b2f_copy_32:
+ mov r12, r2, lsr #5
+.Lneon_b2f_copy_32_loop:
+ sub r1, r1, #32
+ sub r0, r0, #32
+ vld1.32 {q0,q1}, [r1]
+ subs r12, r12, #1
+ vst1.32 {q0,q1}, [r0]
+ bne .Lneon_b2f_copy_32_loop
+ ands r2, r2, #0x1f
+ beq .Lneon_memmove_done
+.Lneon_b2f_copy_finish:
+.Lneon_b2f_copy_8:
+ movs r12, r2, lsr #0x3
+ beq .Lneon_b2f_copy_4
+.Lneon_b2f_copy_8_loop:
+ sub r1, r1, #8
+ sub r0, r0, #8
+ vld1.32 {d0}, [r1]
+ subs r12, r12, #1
+ vst1.32 {d0}, [r0]
+ bne .Lneon_b2f_copy_8_loop
+ ands r2, r2, #0x7
+ beq .Lneon_memmove_done
+.Lneon_b2f_copy_4:
+ movs r12, r2, lsr #0x2
+ beq .Lneon_b2f_copy_1
+.Lneon_b2f_copy_4_loop:
+ ldr r3, [r1, #-4]!
+ subs r12, r12, #1
+ str r3, [r0, #-4]!
+ bne .Lneon_b2f_copy_4_loop
+ ands r2, r2, #0x3
+ nop
+.Lneon_b2f_copy_1:
+ cmp r2, #0
+ beq .Lneon_memmove_done
+.Lneon_b2f_copy_1_loop:
+ ldrb r12, [r1, #-1]!
+ subs r2, r2, #1
+ strb r12, [r0, #-1]!
+ bne .Lneon_b2f_copy_1_loop
+
+.Lneon_memmove_done:
+ pop {r0}
+ bx lr
+
+ .end
+#endif /* SCORPION_NEON_OPTIMIZATION */
+
diff --git a/libc/arch-arm/bionic/memset.S b/libc/arch-arm/bionic/memset.S
index 273b9e3..c386e7e 100644
--- a/libc/arch-arm/bionic/memset.S
+++ b/libc/arch-arm/bionic/memset.S
@@ -2,6 +2,8 @@
* Copyright (C) 2008 The Android Open Source Project
* All rights reserved.
*
+ * Copyright (c) 2009-2011, Code Aurora Forum. All rights reserved.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -27,6 +29,90 @@
*/
#include <machine/asm.h>
+
+#if( defined(SCORPION_NEON_OPTIMIZATION) || defined(CORTEX_CACHE_LINE_32))
+ .code 32
+ .align 8
+ .global memset
+ .type memset, %function
+
+ .global bzero
+ .type bzero, %function
+
+bzero:
+ mov r2, r1
+ mov r1, #0
+memset:
+ push {r0}
+
+ cmp r2, #6
+ bgt .Lmemset_gt6
+ cmp r2, #0
+ beq .Lmemset_smallcopy_done
+.Lmemset_smallcopy_loop:
+ strb r1, [r0], #1
+ subs r2, r2, #1
+ bne .Lmemset_smallcopy_loop
+.Lmemset_smallcopy_done:
+ pop {r0}
+ bx lr
+
+.Lmemset_gt6:
+ vdup.8 q0, r1
+ vmov r1, s0
+
+ /*
+ * Decide where to route for the maximum copy sizes.
+ */
+ cmp r2, #4
+ blt .Lmemset_lt4
+ cmp r2, #16
+ blt .Lmemset_lt16
+ vmov q1, q0
+ cmp r2, #128
+ blt .Lmemset_32
+.Lmemset_128:
+ mov r12, r2, lsr #7
+.Lmemset_128_loop:
+ vst1.32 {q0, q1}, [r0]!
+ vst1.32 {q0, q1}, [r0]!
+ vst1.32 {q0, q1}, [r0]!
+ vst1.32 {q0, q1}, [r0]!
+ subs r12, r12, #1
+ bne .Lmemset_128_loop
+ ands r2, r2, #0x7f
+ beq .Lmemset_end
+.Lmemset_32:
+ movs r12, r2, lsr #5
+ beq .Lmemset_lt32
+.Lmemset_32_loop:
+ subs r12, r12, #1
+ vst1.32 {q0, q1}, [r0]!
+ bne .Lmemset_32_loop
+ ands r2, r2, #0x1f
+ beq .Lmemset_end
+.Lmemset_lt32:
+ cmp r2, #16
+ blt .Lmemset_lt16
+ vst1.64 {q0}, [r0]!
+ subs r2, r2, #16
+ beq .Lmemset_end
+.Lmemset_lt16:
+ movs r12, r2, lsl #29
+ strcs r1, [r0], #4
+ strcs r1, [r0], #4
+ strmi r1, [r0], #4
+.Lmemset_lt4:
+ movs r2, r2, lsl #31
+ strcsh r1, [r0], #2
+ strmib r1, [r0]
+.Lmemset_end:
+ pop {r0}
+ bx lr
+
+ .end
+#else /* !(SCORPION_NEON_OPTIMIZATION || CORTEX_CACHE_LINE_32) */
+
/*
* Optimized memset() for ARM.
@@ -107,3 +193,5 @@ ENTRY(memset)
ldmfd sp!, {r0, r4-r7, lr}
bx lr
END(memset)
+
+#endif /* SCORPION_NEON_OPTIMIZATION */
diff --git a/libc/arch-arm/bionic/strlen-armv7.S b/libc/arch-arm/bionic/strlen-armv7.S
new file mode 100644
index 0000000..125e92f
--- /dev/null
+++ b/libc/arch-arm/bionic/strlen-armv7.S
@@ -0,0 +1,111 @@
+/* Copyright (c) 2010-2011, Linaro Limited
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of Linaro Limited nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ Written by Dave Gilbert <david.gilbert@linaro.org>
+
+ This strlen routine is optimised on a Cortex-A9 and should work on
+ all ARMv7 processors. This routine is reasonably fast for short
+ strings, but is probably slower than a simple implementation if all
+ your strings are very short */
+
+@ 2011-02-08 david.gilbert@linaro.org
+@ Extracted from local git 6848613a
+
+
+@ this lets us check a flag in a 00/ff byte easily in either endianness
+#ifdef __ARMEB__
+#define CHARTSTMASK(c) 1<<(31-(c*8))
+#else
+#define CHARTSTMASK(c) 1<<(c*8)
+#endif
+
+@-----------------------------------------------------------------------------------------------------------------------------
+ .syntax unified
+ .arch armv7-a
+
+ .thumb_func
+ .align 2
+ .p2align 4,,15
+ .global strlen
+ .type strlen,%function
+strlen:
+ @ r0 = string
+ @ returns count of bytes in string not including terminator
+ mov r1, r0
+ push { r4,r6 }
+ mvns r6, #0 @ all F
+ movs r4, #0
+ tst r0, #7
+ beq 2f
+
+1:
+ ldrb r2, [r1], #1
+ tst r1, #7 @ Hit alignment yet?
+ cbz r2, 10f @ Exit if we found the 0
+ bne 1b
+
+ @ So we're now aligned
+2:
+ ldmia r1!,{r2,r3}
+ uadd8 r2, r2, r6 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0
+ sel r2, r4, r6 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
+ uadd8 r3, r3, r6 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0
+ sel r3, r2, r6 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
+ cmp r3, #0
+ beq 2b
+
+strlenendtmp:
+ @ One (or more) of the bytes we loaded was 0 - but which one?
+ @ r2 has the mask corresponding to the first loaded word
+ @ r3 has a combined mask of the two words - but if r2 was all-non 0
+ @ then it's just the 2nd words
+ cmp r2, #0
+ itte eq
+ moveq r2, r3 @ the end is in the 2nd word
+ subeq r1,r1,#3
+ subne r1,r1,#7
+
+ @ r1 currently points to the 2nd byte of the word containing the 0
+ tst r2, # CHARTSTMASK(0) @ 1st character
+ bne 10f
+ adds r1,r1,#1
+ tst r2, # CHARTSTMASK(1) @ 2nd character
+ ittt eq
+ addeq r1,r1,#1
+ tsteq r2, # (3<<15) @ 2nd & 3rd character
+ @ If not the 3rd must be the last one
+ addeq r1,r1,#1
+
+10:
+ @ r0 is still at the beginning, r1 is pointing 1 byte after the terminator
+ sub r0, r1, r0
+ subs r0, r0, #1
+ pop { r4, r6 }
+ bx lr
diff --git a/libc/arch-arm/include/machine/kernel_user_helper.h b/libc/arch-arm/include/machine/kernel_user_helper.h
new file mode 100644
index 0000000..8836c50
--- /dev/null
+++ b/libc/arch-arm/include/machine/kernel_user_helper.h
@@ -0,0 +1,68 @@
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ * * Neither the name of The Linux Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _ARCH_ARM_KERNEL_USER_HELPER_H
+#define _ARCH_ARM_KERNEL_USER_HELPER_H
+
+extern int clock_gettime_syscall(int clk_id, struct timespec *tp);
+extern int gettimeofday_syscall(struct timeval *tv, struct timezone *tz);
+
+#define __kuser_gtod_base (*(int32_t *)0xffff0f40)
+#define __kuser_gtod_cycle_last (*(int32_t *)0xffff0f40)
+#define __kuser_gtod_mask (*(int32_t *)0xffff0f48)
+#define __kuser_gtod_mult (*(int32_t *)0xffff0f50)
+#define __kuser_gtod_shift (*(int32_t *)0xffff0f54)
+#define __kuser_gtod_tv_sec (*(int32_t *)0xffff0f58)
+#define __kuser_gtod_tv_nsec (*(int32_t *)0xffff0f5c)
+
+#define __kuser_gtod_seqnum (*(int32_t *)0xffff0f28)
+#define __kuser_gtod_offset (*(int32_t *)0xffff0f30)
+#define __kuser_gtod_cycle_base 0xfffef000
+#define __kuser_gtod_feature (*(int32_t *)0xffff0f34)
+#define __kuser_gtod_feature_flag 0xffff0f20
+
+#define __kuser_gtod_wtm_tv_sec (*(int32_t *)0xffff0f38)
+#define __kuser_gtod_wtm_tv_nsec (*(int32_t *)0xffff0f3c)
+
+#define __kuser_gtod_timezone (*(int32_t *)0xffff0f20)
+#define __kuser_gtod_tz_minw (*(int32_t *)0xffff0f20)
+#define __kuser_gtod_tz_dst (*(int32_t *)0xffff0f24)
+
+struct gtod_t {
+ uint64_t cycle_last;
+ uint64_t mask;
+ uint32_t mult;
+ uint32_t shift;
+ uint32_t tv_sec;
+ uint32_t tv_nsec;
+};
+
+#define NSEC_PER_SEC 1000000000L
+
+#endif
diff --git a/libc/bionic/md5.c b/libc/bionic/md5.c
index ba4aaed..02785bd 100644
--- a/libc/bionic/md5.c
+++ b/libc/bionic/md5.c
@@ -231,7 +231,7 @@ MD5_Update (struct md5 *m, const void *v, size_t len)
}
calc(m, current);
#else
- calc(m, (u_int32_t*)m->save);
+ calc(m, m->save32);
#endif
offset = 0;
}
diff --git a/libc/bionic/md5.h b/libc/bionic/md5.h
index a381994..079ed84 100644
--- a/libc/bionic/md5.h
+++ b/libc/bionic/md5.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1995 - 2001 Kungliga Tekniska Högskolan
+ * Copyright (c) 1995 - 2001 Kungliga Tekniska H?gskolan
* (Royal Institute of Technology, Stockholm, Sweden).
* All rights reserved.
*
@@ -40,7 +40,10 @@
struct md5 {
unsigned int sz[2];
u_int32_t counter[4];
- unsigned char save[64];
+ union {
+ unsigned char save[64];
+ u_int32_t save32[16];
+ };
};
typedef struct md5 MD5_CTX;
diff --git a/libc/bionic/sha1.c b/libc/bionic/sha1.c
index efa95a5..7384812 100644
--- a/libc/bionic/sha1.c
+++ b/libc/bionic/sha1.c
@@ -23,10 +23,6 @@
#include <sha1.h>
#include <string.h>
-#if HAVE_NBTOOL_CONFIG_H
-#include "nbtool_config.h"
-#endif
-
#if !HAVE_SHA1_H
#define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits))))
@@ -54,77 +50,16 @@
#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30);
typedef union {
- u_char c[64];
- u_int l[16];
+ uint8_t c[64];
+ uint32_t l[16];
} CHAR64LONG16;
-/* old sparc64 gcc could not compile this */
-#undef SPARC64_GCC_WORKAROUND
-#if defined(__sparc64__) && defined(__GNUC__) && __GNUC__ < 3
-#define SPARC64_GCC_WORKAROUND
-#endif
-
-#ifdef SPARC64_GCC_WORKAROUND
-void do_R01(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *);
-void do_R2(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *);
-void do_R3(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *);
-void do_R4(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *);
-
-#define nR0(v,w,x,y,z,i) R0(*v,*w,*x,*y,*z,i)
-#define nR1(v,w,x,y,z,i) R1(*v,*w,*x,*y,*z,i)
-#define nR2(v,w,x,y,z,i) R2(*v,*w,*x,*y,*z,i)
-#define nR3(v,w,x,y,z,i) R3(*v,*w,*x,*y,*z,i)
-#define nR4(v,w,x,y,z,i) R4(*v,*w,*x,*y,*z,i)
-
-void
-do_R01(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *block)
-{
- nR0(a,b,c,d,e, 0); nR0(e,a,b,c,d, 1); nR0(d,e,a,b,c, 2); nR0(c,d,e,a,b, 3);
- nR0(b,c,d,e,a, 4); nR0(a,b,c,d,e, 5); nR0(e,a,b,c,d, 6); nR0(d,e,a,b,c, 7);
- nR0(c,d,e,a,b, 8); nR0(b,c,d,e,a, 9); nR0(a,b,c,d,e,10); nR0(e,a,b,c,d,11);
- nR0(d,e,a,b,c,12); nR0(c,d,e,a,b,13); nR0(b,c,d,e,a,14); nR0(a,b,c,d,e,15);
- nR1(e,a,b,c,d,16); nR1(d,e,a,b,c,17); nR1(c,d,e,a,b,18); nR1(b,c,d,e,a,19);
-}
-
-void
-do_R2(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *block)
-{
- nR2(a,b,c,d,e,20); nR2(e,a,b,c,d,21); nR2(d,e,a,b,c,22); nR2(c,d,e,a,b,23);
- nR2(b,c,d,e,a,24); nR2(a,b,c,d,e,25); nR2(e,a,b,c,d,26); nR2(d,e,a,b,c,27);
- nR2(c,d,e,a,b,28); nR2(b,c,d,e,a,29); nR2(a,b,c,d,e,30); nR2(e,a,b,c,d,31);
- nR2(d,e,a,b,c,32); nR2(c,d,e,a,b,33); nR2(b,c,d,e,a,34); nR2(a,b,c,d,e,35);
- nR2(e,a,b,c,d,36); nR2(d,e,a,b,c,37); nR2(c,d,e,a,b,38); nR2(b,c,d,e,a,39);
-}
-
-void
-do_R3(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *block)
-{
- nR3(a,b,c,d,e,40); nR3(e,a,b,c,d,41); nR3(d,e,a,b,c,42); nR3(c,d,e,a,b,43);
- nR3(b,c,d,e,a,44); nR3(a,b,c,d,e,45); nR3(e,a,b,c,d,46); nR3(d,e,a,b,c,47);
- nR3(c,d,e,a,b,48); nR3(b,c,d,e,a,49); nR3(a,b,c,d,e,50); nR3(e,a,b,c,d,51);
- nR3(d,e,a,b,c,52); nR3(c,d,e,a,b,53); nR3(b,c,d,e,a,54); nR3(a,b,c,d,e,55);
- nR3(e,a,b,c,d,56); nR3(d,e,a,b,c,57); nR3(c,d,e,a,b,58); nR3(b,c,d,e,a,59);
-}
-
-void
-do_R4(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *block)
-{
- nR4(a,b,c,d,e,60); nR4(e,a,b,c,d,61); nR4(d,e,a,b,c,62); nR4(c,d,e,a,b,63);
- nR4(b,c,d,e,a,64); nR4(a,b,c,d,e,65); nR4(e,a,b,c,d,66); nR4(d,e,a,b,c,67);
- nR4(c,d,e,a,b,68); nR4(b,c,d,e,a,69); nR4(a,b,c,d,e,70); nR4(e,a,b,c,d,71);
- nR4(d,e,a,b,c,72); nR4(c,d,e,a,b,73); nR4(b,c,d,e,a,74); nR4(a,b,c,d,e,75);
- nR4(e,a,b,c,d,76); nR4(d,e,a,b,c,77); nR4(c,d,e,a,b,78); nR4(b,c,d,e,a,79);
-}
-#endif
-
/*
* Hash a single 512-bit block. This is the core of the algorithm.
*/
-void SHA1Transform(state, buffer)
- u_int32_t state[5];
- const u_char buffer[64];
+void SHA1Transform(uint32_t state[5], const uint8_t buffer[64])
{
- u_int32_t a, b, c, d, e;
+ uint32_t a, b, c, d, e;
CHAR64LONG16 *block;
#ifdef SHA1HANDSOFF
@@ -148,12 +83,6 @@ void SHA1Transform(state, buffer)
d = state[3];
e = state[4];
-#ifdef SPARC64_GCC_WORKAROUND
- do_R01(&a, &b, &c, &d, &e, block);
- do_R2(&a, &b, &c, &d, &e, block);
- do_R3(&a, &b, &c, &d, &e, block);
- do_R4(&a, &b, &c, &d, &e, block);
-#else
/* 4 rounds of 20 operations each. Loop unrolled. */
R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
@@ -175,7 +104,6 @@ void SHA1Transform(state, buffer)
R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
-#endif
/* Add the working vars back into context.state[] */
state[0] += a;
@@ -192,10 +120,8 @@ void SHA1Transform(state, buffer)
/*
* SHA1Init - Initialize new context
*/
-void SHA1Init(context)
- SHA1_CTX *context;
+void SHA1Init(SHA1_CTX *context)
{
-
assert(context != 0);
/* SHA1 initialization constants */
@@ -211,12 +137,9 @@ void SHA1Init(context)
/*
* Run your data through this.
*/
-void SHA1Update(context, data, len)
- SHA1_CTX *context;
- const u_char *data;
- u_int len;
+void SHA1Update(SHA1_CTX *context, const uint8_t *data, unsigned int len)
{
- u_int i, j;
+ unsigned int i, j;
assert(context != 0);
assert(data != 0);
@@ -241,28 +164,26 @@ void SHA1Update(context, data, len)
/*
* Add padding and return the message digest.
*/
-void SHA1Final(digest, context)
- u_char digest[20];
- SHA1_CTX* context;
+void SHA1Final(uint8_t digest[20], SHA1_CTX *context)
{
- u_int i;
- u_char finalcount[8];
+ unsigned int i;
+ uint8_t finalcount[8];
assert(digest != 0);
assert(context != 0);
for (i = 0; i < 8; i++) {
- finalcount[i] = (u_char)((context->count[(i >= 4 ? 0 : 1)]
+ finalcount[i] = (uint8_t)((context->count[(i >= 4 ? 0 : 1)]
>> ((3-(i & 3)) * 8) ) & 255); /* Endian independent */
}
- SHA1Update(context, (const u_char *)"\200", 1);
+ SHA1Update(context, (const uint8_t *)"\200", 1);
while ((context->count[0] & 504) != 448)
- SHA1Update(context, (const u_char *)"\0", 1);
+ SHA1Update(context, (const uint8_t *)"\0", 1);
SHA1Update(context, finalcount, 8); /* Should cause a SHA1Transform() */
if (digest) {
for (i = 0; i < 20; i++)
- digest[i] = (u_char)
+ digest[i] = (uint8_t)
((context->state[i>>2] >> ((3-(i & 3)) * 8) ) & 255);
}
}
diff --git a/libc/bionic/system_properties.c b/libc/bionic/system_properties.c
index caa5ca6..756ee3f 100644
--- a/libc/bionic/system_properties.c
+++ b/libc/bionic/system_properties.c
@@ -158,7 +158,10 @@ int __system_property_get(const char *name, char *value)
static int send_prop_msg(prop_msg *msg)
{
struct pollfd pollfds[1];
- struct sockaddr_un addr;
+ union {
+ struct sockaddr_un addr;
+ struct sockaddr addr_g;
+ } addr;
socklen_t alen;
size_t namelen;
int s;
@@ -172,11 +175,11 @@ static int send_prop_msg(prop_msg *msg)
memset(&addr, 0, sizeof(addr));
namelen = strlen(property_service_socket);
- strlcpy(addr.sun_path, property_service_socket, sizeof addr.sun_path);
- addr.sun_family = AF_LOCAL;
+ strlcpy(addr.addr.sun_path, property_service_socket, sizeof addr.addr.sun_path);
+ addr.addr.sun_family = AF_LOCAL;
alen = namelen + offsetof(struct sockaddr_un, sun_path) + 1;
- if(TEMP_FAILURE_RETRY(connect(s, (struct sockaddr *) &addr, alen)) < 0) {
+ if(TEMP_FAILURE_RETRY(connect(s, &addr.addr_g, alen) < 0)) {
close(s);
return result;
}
diff --git a/libc/include/errno.h b/libc/include/errno.h
index e1b15c0..d3b0506 100644
--- a/libc/include/errno.h
+++ b/libc/include/errno.h
@@ -45,6 +45,7 @@ __BEGIN_DECLS
extern int __set_errno(int error);
/* internal function returning the address of the thread-specific errno */
+__attribute__((const))
extern volatile int* __errno(void);
/* a macro expanding to the errno l-value */
diff --git a/libc/include/netinet/in6.h b/libc/include/netinet/in6.h
index 7f3286a..ba24b6c 100644
--- a/libc/include/netinet/in6.h
+++ b/libc/include/netinet/in6.h
@@ -31,28 +31,28 @@
#include <linux/in6.h>
#define IN6_IS_ADDR_UNSPECIFIED(a) \
- ((*(const uint32_t *)(const void *)(&(a)->s6_addr[0]) == 0) && \
- (*(const uint32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \
- (*(const uint32_t *)(const void *)(&(a)->s6_addr[8]) == 0) && \
- (*(const uint32_t *)(const void *)(&(a)->s6_addr[12]) == 0))
+ (((a)->s6_addr32[0] == 0) && \
+ ((a)->s6_addr32[1] == 0) && \
+ ((a)->s6_addr32[2] == 0) && \
+ ((a)->s6_addr32[3] == 0))
#define IN6_IS_ADDR_LOOPBACK(a) \
- ((*(const uint32_t *)(const void *)(&(a)->s6_addr[0]) == 0) && \
- (*(const uint32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \
- (*(const uint32_t *)(const void *)(&(a)->s6_addr[8]) == 0) && \
- (*(const uint32_t *)(const void *)(&(a)->s6_addr[12]) == ntohl(1)))
+ (((a)->s6_addr32[0] == 0) && \
+ ((a)->s6_addr32[1] == 0) && \
+ ((a)->s6_addr32[2] == 0) && \
+ ((a)->s6_addr32[3] == ntohl(1)))
#define IN6_IS_ADDR_V4COMPAT(a) \
- ((*(const uint32_t *)(const void *)(&(a)->s6_addr[0]) == 0) && \
- (*(const uint32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \
- (*(const uint32_t *)(const void *)(&(a)->s6_addr[8]) == 0) && \
- (*(const uint32_t *)(const void *)(&(a)->s6_addr[12]) != 0) && \
- (*(const uint32_t *)(const void *)(&(a)->s6_addr[12]) != ntohl(1)))
+ (((a)->s6_addr32[0] == 0) && \
+ ((a)->s6_addr32[1] == 0) && \
+ ((a)->s6_addr32[2] == 0) && \
+ ((a)->s6_addr32[3] != 0) && \
+ ((a)->s6_addr32[3] != ntohl(1)))
#define IN6_IS_ADDR_V4MAPPED(a) \
- ((*(const uint32_t *)(const void *)(&(a)->s6_addr[0]) == 0) && \
- (*(const uint32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \
- (*(const uint32_t *)(const void *)(&(a)->s6_addr[8]) == ntohl(0x0000ffff)))
+ (((a)->s6_addr32[0] == 0) && \
+ ((a)->s6_addr32[1] == 0) && \
+ ((a)->s6_addr32[2] == ntohl(0x0000ffff)))
#define IN6_IS_ADDR_LINKLOCAL(a) \
(((a)->s6_addr[0] == 0xfe) && (((a)->s6_addr[1] & 0xc0) == 0x80))
@@ -65,7 +65,7 @@
(((a)->s6_addr[0] & 0xfe) == 0xfc)
#define IN6_IS_ADDR_MULTICAST(a) \
- (((__const uint8_t *) (a))[0] == 0xff)
+ ((a)->s6_addr[0] == 0xff)
#define IPV6_ADDR_SCOPE_NODELOCAL 0x01
diff --git a/libc/include/pthread.h b/libc/include/pthread.h
index 2015ac0..af0cc5f 100644
--- a/libc/include/pthread.h
+++ b/libc/include/pthread.h
@@ -146,6 +146,7 @@ void pthread_exit(void * retval);
int pthread_join(pthread_t thid, void ** ret_val);
int pthread_detach(pthread_t thid);
+__attribute__((const))
pthread_t pthread_self(void);
int pthread_equal(pthread_t one, pthread_t two);
diff --git a/libc/include/resolv.h b/libc/include/resolv.h
index 7c34012..221410d 100644
--- a/libc/include/resolv.h
+++ b/libc/include/resolv.h
@@ -40,7 +40,7 @@ __BEGIN_DECLS
struct res_state;
-extern struct __res_state *__res_state(void);
+extern struct __res_state *__res_state(void) __attribute__((const));
#define _res (*__res_state())
/* Base-64 functions - because some code expects it there */
diff --git a/libc/include/sha1.h b/libc/include/sha1.h
index f7ada46..adfa1fc 100644
--- a/libc/include/sha1.h
+++ b/libc/include/sha1.h
@@ -18,14 +18,14 @@
typedef struct {
uint32_t state[5];
uint32_t count[2];
- u_char buffer[64];
+ uint8_t buffer[64];
} SHA1_CTX;
__BEGIN_DECLS
-void SHA1Transform(uint32_t[5], const u_char[64]);
+void SHA1Transform(uint32_t[5], const uint8_t[64]);
void SHA1Init(SHA1_CTX *);
-void SHA1Update(SHA1_CTX *, const u_char *, u_int);
-void SHA1Final(u_char[SHA1_DIGEST_LENGTH], SHA1_CTX *);
+void SHA1Update(SHA1_CTX *, const uint8_t *, unsigned int);
+void SHA1Final(uint8_t[SHA1_DIGEST_LENGTH], SHA1_CTX *);
__END_DECLS
#endif /* _SYS_SHA1_H_ */
diff --git a/libc/include/string.h b/libc/include/string.h
index 06e2284..2ed74e8 100644
--- a/libc/include/string.h
+++ b/libc/include/string.h
@@ -224,6 +224,39 @@ size_t strlen(const char *s) {
return __strlen_chk(s, bos);
}
+__purefunc extern char* __strchr_real(const char *, int)
+ __asm__(__USER_LABEL_PREFIX__ "strchr");
+extern char* __strchr_chk(const char *, int, size_t);
+
+__BIONIC_FORTIFY_INLINE
+char* strchr(const char *s, int c) {
+ size_t bos = __builtin_object_size(s, 0);
+
+ // Compiler doesn't know destination size. Don't call __strchr_chk
+ if (bos == __BIONIC_FORTIFY_UNKNOWN_SIZE) {
+ return __strchr_real(s, c);
+ }
+
+ return __strchr_chk(s, c, bos);
+}
+
+__purefunc extern char* __strrchr_real(const char *, int)
+ __asm__(__USER_LABEL_PREFIX__ "strrchr");
+extern char* __strrchr_chk(const char *, int, size_t);
+
+__BIONIC_FORTIFY_INLINE
+char* strrchr(const char *s, int c) {
+ size_t bos = __builtin_object_size(s, 0);
+
+ // Compiler doesn't know destination size. Don't call __strrchr_chk
+ if (bos == __BIONIC_FORTIFY_UNKNOWN_SIZE) {
+ return __strrchr_real(s, c);
+ }
+
+ return __strrchr_chk(s, c, bos);
+}
+
+
#endif /* defined(__BIONIC_FORTIFY_INLINE) */
__END_DECLS
diff --git a/libc/kernel/arch-arm/asm/unistd.h b/libc/kernel/arch-arm/asm/unistd.h
index 454ed89..b3d75ca 100644
--- a/libc/kernel/arch-arm/asm/unistd.h
+++ b/libc/kernel/arch-arm/asm/unistd.h
@@ -466,7 +466,7 @@
#define __ARM_NR_usr32 (__ARM_NR_BASE+4)
#define __ARM_NR_set_tls (__ARM_NR_BASE+5)
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
-#ifdef __ARM_EABI__
+#if defined(__ARM_EABI__) && !defined(__KERNEL__)
#undef __NR_time
#undef __NR_umount
#undef __NR_stime
diff --git a/libc/kernel/common/linux/android_pmem.h b/libc/kernel/common/linux/android_pmem.h
index 8c605e4..f463807 100644
--- a/libc/kernel/common/linux/android_pmem.h
+++ b/libc/kernel/common/linux/android_pmem.h
@@ -29,6 +29,11 @@
#define PMEM_CONNECT _IOW(PMEM_IOCTL_MAGIC, 6, unsigned int)
#define PMEM_GET_TOTAL_SIZE _IOW(PMEM_IOCTL_MAGIC, 7, unsigned int)
#define PMEM_CACHE_FLUSH _IOW(PMEM_IOCTL_MAGIC, 8, unsigned int)
+
+#define PMEM_CLEAN_INV_CACHES _IOW(PMEM_IOCTL_MAGIC, 11, unsigned int)
+
+#define PMEM_ALLOCATE_ALIGNED _IOW(PMEM_IOCTL_MAGIC, 15, unsigned int)
+
struct android_pmem_platform_data
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
{
@@ -46,4 +51,16 @@ struct pmem_region {
unsigned long len;
};
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+
+struct pmem_addr {
+ unsigned long vaddr;
+ unsigned long offset;
+ unsigned long length;
+};
+
+struct pmem_allocation {
+ unsigned long size;
+ unsigned int align;
+};
+
#endif
diff --git a/libc/kernel/common/linux/ashmem.h b/libc/kernel/common/linux/ashmem.h
index e402e4e..a24d75a 100644
--- a/libc/kernel/common/linux/ashmem.h
+++ b/libc/kernel/common/linux/ashmem.h
@@ -47,4 +47,6 @@ struct ashmem_pin {
#define ASHMEM_GET_PIN_STATUS _IO(__ASHMEMIOC, 9)
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
#define ASHMEM_PURGE_ALL_CACHES _IO(__ASHMEMIOC, 10)
+#define ASHMEM_CACHE_FLUSH_RANGE _IO(__ASHMEMIOC, 11)
+
#endif
diff --git a/libc/netbsd/gethnamaddr.c b/libc/netbsd/gethnamaddr.c
index 9a9f6e2..055e9f2 100644
--- a/libc/netbsd/gethnamaddr.c
+++ b/libc/netbsd/gethnamaddr.c
@@ -653,14 +653,14 @@ gethostbyaddr(const void *addr,
assert(addr != NULL);
if (af == AF_INET6 && len == IN6ADDRSZ &&
- (IN6_IS_ADDR_LINKLOCAL((const struct in6_addr *)(const void *)uaddr) ||
- IN6_IS_ADDR_SITELOCAL((const struct in6_addr *)(const void *)uaddr))) {
+ (IN6_IS_ADDR_LINKLOCAL((const struct in6_addr *)addr) ||
+ IN6_IS_ADDR_SITELOCAL((const struct in6_addr *)addr))) {
h_errno = HOST_NOT_FOUND;
return NULL;
}
if (af == AF_INET6 && len == IN6ADDRSZ &&
- (IN6_IS_ADDR_V4MAPPED((const struct in6_addr *)(const void *)uaddr) ||
- IN6_IS_ADDR_V4COMPAT((const struct in6_addr *)(const void *)uaddr))) {
+ (IN6_IS_ADDR_V4MAPPED((const struct in6_addr *)addr) ||
+ IN6_IS_ADDR_V4COMPAT((const struct in6_addr *)addr))) {
/* Unmap. */
addr += IN6ADDRSZ - INADDRSZ;
uaddr += IN6ADDRSZ - INADDRSZ;
diff --git a/libc/netbsd/net/getaddrinfo.c b/libc/netbsd/net/getaddrinfo.c
index 326b09c..bd29c5a 100644
--- a/libc/netbsd/net/getaddrinfo.c
+++ b/libc/netbsd/net/getaddrinfo.c
@@ -411,7 +411,10 @@ android_getaddrinfo_proxy(
{
int sock;
const int one = 1;
- struct sockaddr_un proxy_addr;
+ union {
+ struct sockaddr_un un;
+ struct sockaddr generic;
+ } proxy_addr;
const char* cache_mode = getenv("ANDROID_DNS_MODE");
FILE* proxy = NULL;
int success = 0;
@@ -452,12 +455,12 @@ android_getaddrinfo_proxy(
setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
memset(&proxy_addr, 0, sizeof(proxy_addr));
- proxy_addr.sun_family = AF_UNIX;
- strlcpy(proxy_addr.sun_path, "/dev/socket/dnsproxyd",
- sizeof(proxy_addr.sun_path));
+ proxy_addr.un.sun_family = AF_UNIX;
+ strlcpy(proxy_addr.un.sun_path, "/dev/socket/dnsproxyd",
+ sizeof(proxy_addr.un.sun_path));
if (TEMP_FAILURE_RETRY(connect(sock,
- (const struct sockaddr*) &proxy_addr,
- sizeof(proxy_addr))) != 0) {
+ &proxy_addr.generic,
+ sizeof(proxy_addr.un))) != 0) {
close(sock);
return -1;
}
@@ -1547,7 +1550,7 @@ _get_scope(const struct sockaddr *addr)
/* RFC 4380, section 2.6 */
#define IN6_IS_ADDR_TEREDO(a) \
- ((*(const uint32_t *)(const void *)(&(a)->s6_addr[0]) == ntohl(0x20010000)))
+ (((a)->s6_addr32[0]) == ntohl(0x20010000))
/* RFC 3056, section 2. */
#define IN6_IS_ADDR_6TO4(a) \
diff --git a/libc/netbsd/net/getnameinfo.c b/libc/netbsd/net/getnameinfo.c
index d8ac037..da9d7e3 100644
--- a/libc/netbsd/net/getnameinfo.c
+++ b/libc/netbsd/net/getnameinfo.c
@@ -147,7 +147,10 @@ android_gethostbyaddr_proxy(char* nameBuf, size_t nameBufLen, const void *addr,
int sock;
const int one = 1;
- struct sockaddr_un proxy_addr;
+ union {
+ struct sockaddr_un un;
+ struct sockaddr generic;
+ } proxy_addr;
const char* cache_mode = getenv("ANDROID_DNS_MODE");
FILE* proxy = NULL;
int result = -1;
@@ -175,11 +178,11 @@ android_gethostbyaddr_proxy(char* nameBuf, size_t nameBufLen, const void *addr,
setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
memset(&proxy_addr, 0, sizeof(proxy_addr));
- proxy_addr.sun_family = AF_UNIX;
- strlcpy(proxy_addr.sun_path, "/dev/socket/dnsproxyd",
- sizeof(proxy_addr.sun_path));
- if (TEMP_FAILURE_RETRY(connect(sock, (const struct sockaddr*) (void*) &proxy_addr,
- sizeof(proxy_addr))) != 0) {
+ proxy_addr.un.sun_family = AF_UNIX;
+ strlcpy(proxy_addr.un.sun_path, "/dev/socket/dnsproxyd",
+ sizeof(proxy_addr.un.sun_path));
+ if (TEMP_FAILURE_RETRY(connect(sock, &proxy_addr.generic,
+ sizeof(proxy_addr.un))) != 0) {
close(sock);
return -1;
}
diff --git a/libc/netbsd/resolv/res_send.c b/libc/netbsd/resolv/res_send.c
index f3ee539..028ffaf 100644
--- a/libc/netbsd/resolv/res_send.c
+++ b/libc/netbsd/resolv/res_send.c
@@ -404,7 +404,10 @@ res_nsend(res_state statp,
*/
if (EXT(statp).nscount != 0) {
int needclose = 0;
- struct sockaddr_storage peer;
+ union {
+ struct sockaddr_storage storage;
+ struct sockaddr generic;
+ } peer;
socklen_t peerlen;
if (EXT(statp).nscount != statp->nscount)
@@ -420,13 +423,13 @@ res_nsend(res_state statp,
if (EXT(statp).nssocks[ns] == -1)
continue;
- peerlen = sizeof(peer);
+ peerlen = sizeof(peer.storage);
if (getpeername(EXT(statp).nssocks[ns],
- (struct sockaddr *)(void *)&peer, &peerlen) < 0) {
+ &peer.generic, &peerlen) < 0) {
needclose++;
break;
}
- if (!sock_eq((struct sockaddr *)(void *)&peer,
+ if (!sock_eq(&peer.generic,
get_nsaddr(statp, (size_t)ns))) {
needclose++;
break;
@@ -750,12 +753,15 @@ send_vc(res_state statp,
/* Are we still talking to whom we want to talk to? */
if (statp->_vcsock >= 0 && (statp->_flags & RES_F_VC) != 0) {
- struct sockaddr_storage peer;
- socklen_t size = sizeof peer;
+ union {
+ struct sockaddr_storage storage;
+ struct sockaddr generic;
+ } peer;
+ socklen_t size = sizeof peer.storage;
if (getpeername(statp->_vcsock,
- (struct sockaddr *)(void *)&peer, &size) < 0 ||
- !sock_eq((struct sockaddr *)(void *)&peer, nsap)) {
+ &peer.generic, &size) < 0 ||
+ !sock_eq(&peer.generic, nsap)) {
res_nclose(statp);
statp->_flags &= ~RES_F_VC;
}
@@ -1034,7 +1040,10 @@ send_dg(res_state statp,
int nsaplen;
struct timespec now, timeout, finish;
fd_set dsmask;
- struct sockaddr_storage from;
+ union {
+ struct sockaddr_storage storage;
+ struct sockaddr generic;
+ } from;
socklen_t fromlen;
int resplen, seconds, n, s;
@@ -1126,9 +1135,9 @@ retry:
return (0);
}
errno = 0;
- fromlen = sizeof(from);
+ fromlen = sizeof(from.storage);
resplen = recvfrom(s, (char*)ans, (size_t)anssiz,0,
- (struct sockaddr *)(void *)&from, &fromlen);
+ &from.generic, &fromlen);
if (resplen <= 0) {
Perror(statp, stderr, "recvfrom", errno);
res_nclose(statp);
@@ -1162,7 +1171,7 @@ retry:
goto retry;
}
if (!(statp->options & RES_INSECURE1) &&
- !res_ourserver_p(statp, (struct sockaddr *)(void *)&from)) {
+ !res_ourserver_p(statp, &from.generic)) {
/*
* response from wrong server? ignore it.
* XXX - potential security hazard could
diff --git a/libc/private/bionic_atomic_arm.h b/libc/private/bionic_atomic_arm.h
index 275c1c9..380c143 100644
--- a/libc/private/bionic_atomic_arm.h
+++ b/libc/private/bionic_atomic_arm.h
@@ -124,6 +124,11 @@ __bionic_memory_barrier(void)
}
#endif /* !ANDROID_SMP */
+/* LDREX/STREX routines broken on ARMv6 */
+# if __ARM_ARCH__ == 6
+# define BROKEN_REX
+# endif
+
/* Compare-and-swap, without any explicit barriers. Note that this functions
* returns 0 on success, and 1 on failure. The opposite convention is typically
* used on other platforms.
@@ -135,7 +140,7 @@ __bionic_memory_barrier(void)
*
* LDREX/STREX are only available starting from ARMv6
*/
-#ifdef __ARM_HAVE_LDREX_STREX
+#if defined(__ARM_HAVE_LDREX_STREX) && !defined(BROKEN_REX)
__ATOMIC_INLINE__ int
__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
{
@@ -182,7 +187,7 @@ __bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
* ARMv6+ => use LDREX/STREX
* < ARMv6 => use SWP instead.
*/
-#ifdef __ARM_HAVE_LDREX_STREX
+#if defined(__ARM_HAVE_LDREX_STREX) && !defined(BROKEN_REX)
__ATOMIC_INLINE__ int32_t
__bionic_swap(int32_t new_value, volatile int32_t* ptr)
{
@@ -216,7 +221,7 @@ __bionic_swap(int32_t new_value, volatile int32_t* ptr)
/* Atomic increment - without any barriers
* This returns the old value
*/
-#ifdef __ARM_HAVE_LDREX_STREX
+#if defined(__ARM_HAVE_LDREX_STREX) && !defined(BROKEN_REX)
__ATOMIC_INLINE__ int32_t
__bionic_atomic_inc(volatile int32_t* ptr)
{
@@ -250,7 +255,7 @@ __bionic_atomic_inc(volatile int32_t* ptr)
/* Atomic decrement - without any barriers
* This returns the old value.
*/
-#ifdef __ARM_HAVE_LDREX_STREX
+#if defined(__ARM_HAVE_LDREX_STREX) && !defined(BROKEN_REX)
__ATOMIC_INLINE__ int32_t
__bionic_atomic_dec(volatile int32_t* ptr)
{
diff --git a/libc/private/bionic_tls.h b/libc/private/bionic_tls.h
index 4658866..2456ebb 100644
--- a/libc/private/bionic_tls.h
+++ b/libc/private/bionic_tls.h
@@ -100,7 +100,9 @@ extern int __set_tls(void *ptr);
* C library, because we don't know where the corresponding code
* is going to run.
*/
-# ifdef LIBC_STATIC
+# if defined(LIBC_STATIC) || \
+ (defined(__ARM_ARCH_6__) && defined(HAVE_ARM_TLS_REGISTER) && \
+ !defined(__ARM_ARCH_6T2__))
/* Use the kernel helper in static C library. */
typedef volatile void* (__kernel_get_tls_t)(void);
@@ -111,6 +113,12 @@ extern int __set_tls(void *ptr);
* Note that HAVE_ARM_TLS_REGISTER is build-specific
* (it must match your kernel configuration)
*/
+# ifdef HAVE_TEGRA_ERRATA_657451
+# define __munge_tls(_v) ( ((_v)&~((1ul<<20)|1ul)) | (((_v)&0x1)<<20) )
+# else
+# define __munge_tls(_v) (_v)
+#endif
+
# ifdef HAVE_ARM_TLS_REGISTER
/* We can read the address directly from a coprocessor
* register, which avoids touching the data cache
@@ -119,6 +127,7 @@ extern int __set_tls(void *ptr);
# define __get_tls() \
({ register unsigned int __val asm("r0"); \
asm ("mrc p15, 0, r0, c13, c0, 3" : "=r"(__val) ); \
+ __val = __munge_tls(__val); \
(volatile void*)__val; })
# else /* !HAVE_ARM_TLS_REGISTER */
/* The kernel provides the address of the TLS at a fixed
diff --git a/libc/private/logd.h b/libc/private/logd.h
index c81a91a..26878ba 100644
--- a/libc/private/logd.h
+++ b/libc/private/logd.h
@@ -29,6 +29,7 @@
#define _ANDROID_BIONIC_LOGD_H
#include <stdarg.h>
+#include <stdint.h>
#define BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW 80100
#define BIONIC_EVENT_STRCAT_BUFFER_OVERFLOW 80105
diff --git a/libc/string/strchr.c b/libc/string/strchr.c
index 9b4332c..44516ef 100644
--- a/libc/string/strchr.c
+++ b/libc/string/strchr.c
@@ -29,11 +29,17 @@
*/
#include <string.h>
+#include <private/logd.h>
char *
-strchr(const char *p, int ch)
+__strchr_chk(const char *p, int ch, size_t s_len)
{
- for (;; ++p) {
+ for (;; ++p, s_len--) {
+ if (s_len == 0) {
+ __libc_android_log_print(ANDROID_LOG_FATAL, "libc",
+ "*** FORTIFY_SOURCE strchr read beyond buffer ***\n");
+ abort();
+ }
if (*p == (char) ch)
return((char *)p);
if (!*p)
@@ -41,3 +47,8 @@ strchr(const char *p, int ch)
}
/* NOTREACHED */
}
+
+char *
+strchr(const char *p, int ch) {
+ return __strchr_chk(p, ch, (size_t) -1);
+}
diff --git a/libc/string/strrchr.c b/libc/string/strrchr.c
index 10c07e6..fc3dc4e 100644
--- a/libc/string/strrchr.c
+++ b/libc/string/strrchr.c
@@ -29,13 +29,19 @@
*/
#include <string.h>
+#include <private/logd.h>
char *
-strrchr(const char *p, int ch)
+__strrchr_chk(const char *p, int ch, size_t s_len)
{
char *save;
- for (save = NULL;; ++p) {
+ for (save = NULL;; ++p, s_len--) {
+ if (s_len == 0) {
+ __libc_android_log_print(ANDROID_LOG_FATAL, "libc",
+ "*** FORTIFY_SOURCE strrchr read beyond buffer ***\n");
+ abort();
+ }
if (*p == (char) ch)
save = (char *)p;
if (!*p)
@@ -43,3 +49,9 @@ strrchr(const char *p, int ch)
}
/* NOTREACHED */
}
+
+char *
+strrchr(const char *p, int ch)
+{
+ return __strrchr_chk(p, ch, (size_t) -1);
+}
diff --git a/libc/tools/zoneinfo/ZoneCompactor.java b/libc/tools/zoneinfo/ZoneCompactor.java
index b657748..cc77c94 100644
--- a/libc/tools/zoneinfo/ZoneCompactor.java
+++ b/libc/tools/zoneinfo/ZoneCompactor.java
@@ -55,11 +55,13 @@ public class ZoneCompactor {
InputStream in = new FileInputStream(inFile);
byte[] buf = new byte[8192];
+ int length = 0;
while (true) {
int nbytes = in.read(buf);
if (nbytes == -1) {
break;
}
+ length += nbytes;
out.write(buf, 0, nbytes);
byte[] nret = new byte[ret.length + nbytes];
@@ -67,6 +69,8 @@ public class ZoneCompactor {
System.arraycopy(buf, 0, nret, ret.length, nbytes);
ret = nret;
}
+ if (length%4 != 0)
+ out.write(new byte[] {00,00,00,00}, 0, 4 - length % 4);
out.flush();
return ret;
}
@@ -105,6 +109,9 @@ public class ZoneCompactor {
lengths.put(s, new Integer((int)length));
start += length;
+ if (start % 4 != 0)
+ start += 4 - start % 4;
+
byte[] data = copyFile(f, zoneInfo);
TimeZone tz = ZoneInfo.make(s, data);
diff --git a/libc/tools/zoneinfo/generate b/libc/tools/zoneinfo/generate
index ab2617f..7017e90 100755
--- a/libc/tools/zoneinfo/generate
+++ b/libc/tools/zoneinfo/generate
@@ -92,7 +92,7 @@ def upgrade_to(ftp, filename):
subprocess.check_call(['javac', '-d', '.',
'%s/ZoneCompactor.java' % bionic_libc_tools_zoneinfo_dir,
'%s/ZoneInfo.java' % bionic_libc_tools_zoneinfo_dir])
- subprocess.check_call(['java', 'ZoneCompactor', 'setup', 'data'])
+ subprocess.check_call(['java', '-classpath', '.', 'ZoneCompactor', 'setup', 'data'])
print 'Updating bionic from %s to %s...' % (current_tzdata_version(), version)
# Move the .dat and .idx files...
@@ -116,7 +116,8 @@ ftp.cwd('tz/releases')
tzdata_filenames = []
for filename in ftp.nlst():
if filename.startswith('tzdata20'):
- tzdata_filenames.append(filename)
+ if filename.endswith('tar.gz'):
+ tzdata_filenames.append(filename)
tzdata_filenames.sort()
# If you're several releases behind, we'll walk you through the upgrades one by one.
diff --git a/libc/unistd/getopt_long.c b/libc/unistd/getopt_long.c
index dbdf01a..0b8181a 100644
--- a/libc/unistd/getopt_long.c
+++ b/libc/unistd/getopt_long.c
@@ -100,12 +100,12 @@ static int nonopt_start = -1; /* first non option argument (for permute) */
static int nonopt_end = -1; /* first option after non options (for permute) */
/* Error messages */
-static const char recargchar[] = "option requires an argument -- %c";
-static const char recargstring[] = "option requires an argument -- %s";
-static const char ambig[] = "ambiguous option -- %.*s";
-static const char noarg[] = "option doesn't take an argument -- %.*s";
-static const char illoptchar[] = "unknown option -- %c";
-static const char illoptstring[] = "unknown option -- %s";
+static const char recargchar[] = "option requires an argument -- %c\n";
+static const char recargstring[] = "option requires an argument -- %s\n";
+static const char ambig[] = "ambiguous option -- %.*s\n";
+static const char noarg[] = "option doesn't take an argument -- %.*s\n";
+static const char illoptchar[] = "unknown option -- %c\n";
+static const char illoptstring[] = "unknown option -- %s\n";
/*
* Compute the greatest common divisor of a and b.
diff --git a/libc/zoneinfo/zoneinfo.dat b/libc/zoneinfo/zoneinfo.dat
index cb0507a..cd4b4cc 100644
--- a/libc/zoneinfo/zoneinfo.dat
+++ b/libc/zoneinfo/zoneinfo.dat
Binary files differ
diff --git a/libc/zoneinfo/zoneinfo.idx b/libc/zoneinfo/zoneinfo.idx
index c93b637..1f5f538 100644
--- a/libc/zoneinfo/zoneinfo.idx
+++ b/libc/zoneinfo/zoneinfo.idx
Binary files differ
diff --git a/libc/zoneinfo/zoneinfo.version b/libc/zoneinfo/zoneinfo.version
index 73bb417..0bbfa63 100644
--- a/libc/zoneinfo/zoneinfo.version
+++ b/libc/zoneinfo/zoneinfo.version
@@ -1 +1 @@
-2012h
+2012j
diff --git a/libm/Android.mk b/libm/Android.mk
index 9c88798..a28f1b8 100644
--- a/libm/Android.mk
+++ b/libm/Android.mk
@@ -72,7 +72,6 @@ libm_common_src_files:= \
src/s_ceill.c \
src/s_copysign.c \
src/s_copysignf.c \
- src/s_cos.c \
src/s_cosf.c \
src/s_erf.c \
src/s_erff.c \
@@ -132,7 +131,6 @@ libm_common_src_files:= \
src/s_signgam.c \
src/s_significand.c \
src/s_significandf.c \
- src/s_sin.c \
src/s_sinf.c \
src/s_tan.c \
src/s_tanf.c \
@@ -162,6 +160,30 @@ ifeq ($(TARGET_ARCH),arm)
src/s_scalbnf.c \
src/e_sqrtf.c
+ ifeq ($(TARGET_USE_KRAIT_BIONIC_OPTIMIZATION),true)
+ libm_common_src_files += \
+ arm/e_pow.S \
+ arm/s_cos.S \
+ arm/s_sin.S
+ libm_common_cflags += -DKRAIT_NEON_OPTIMIZATION -fno-if-conversion
+ else
+ libm_common_src_files += \
+ src/s_cos.c \
+ src/s_sin.c
+ endif
+
+ ifeq ($(TARGET_USE_SPARROW_BIONIC_OPTIMIZATION),true)
+ libm_common_src_files += \
+ arm/e_pow.S
+ libm_common_cflags += -DSPARROW_NEON_OPTIMIZATION
+ endif
+
+ ifeq ($(TARGET_USE_SCORPION_BIONIC_OPTIMIZATION),true)
+ libm_common_src_files += \
+ arm/e_pow.S
+ libm_common_cflags += -DSCORPION_NEON_OPTIMIZATION
+ endif
+
libm_common_includes = $(LOCAL_PATH)/arm
endif
@@ -182,7 +204,9 @@ ifeq ($(TARGET_ARCH),mips)
src/s_scalbln.c \
src/s_scalbn.c \
src/s_scalbnf.c \
- src/e_sqrtf.c
+ src/e_sqrtf.c \
+ src/s_sin.c \
+ src/s_cos.c
libm_common_includes = $(LOCAL_PATH)/mips
# Need to build *rint* functions
@@ -201,6 +225,8 @@ LOCAL_ARM_MODE := arm
LOCAL_C_INCLUDES += $(libm_common_includes)
LOCAL_CFLAGS := $(libm_common_cflags)
+LOCAL_CFLAGS:= $(libm_common_cflags)
+
LOCAL_MODULE:= libm
LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
@@ -221,6 +247,8 @@ LOCAL_ARM_MODE := arm
LOCAL_C_INCLUDES += $(libm_common_includes)
LOCAL_CFLAGS := $(libm_common_cflags)
+LOCAL_CFLAGS:= $(libm_common_cflags)
+
LOCAL_MODULE:= libm
LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
diff --git a/libm/arm/e_pow.S b/libm/arm/e_pow.S
new file mode 100644
index 0000000..1e328f8
--- /dev/null
+++ b/libm/arm/e_pow.S
@@ -0,0 +1,443 @@
+@ Copyright (c) 2012, Code Aurora Forum. All rights reserved.
+@
+@ Redistribution and use in source and binary forms, with or without
+@ modification, are permitted provided that the following conditions are
+@ met:
+@ * Redistributions of source code must retain the above copyright
+@ notice, this list of conditions and the following disclaimer.
+@ * Redistributions in binary form must reproduce the above
+@ copyright notice, this list of conditions and the following
+@ disclaimer in the documentation and/or other materials provided
+@ with the distribution.
+@ * Neither the name of Code Aurora Forum, Inc. nor the names of its
+@ contributors may be used to endorse or promote products derived
+@ from this software without specific prior written permission.
+@
+@ THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+@ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+@ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
+@ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+@ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+@ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+@ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+@ BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+@ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+@ OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+@ IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <machine/cpu-features.h>
+#include <machine/asm.h>
+
+@ Values which exist the program lifetime:
+#define HIGH_WORD_MASK d31
+#define EXPONENT_MASK d30
+#define int_1 d29
+#define double_1 d28
+@ sign and 2^int_n fixup:
+#define expadjustment d7
+#define literals r10
+@ Values which exist within both polynomial implementations:
+#define int_n d2
+#define int_n_low s4
+#define int_n_high s5
+#define double_n d3
+#define k1 d27
+#define k2 d26
+#define k3 d25
+#define k4 d24
+@ Values which cross the boundaries between polynomial implementations:
+#define ss d16
+#define ss2 d17
+#define ss4 d18
+#define Result d0
+#define Return_hw r1
+#define Return_lw r0
+#define ylg2x d0
+@ Intermediate values only needed sometimes:
+@ initial (sorted in approximate order of availability for overwriting):
+#define x_hw r1
+#define x_lw r0
+#define y_hw r3
+#define y_lw r2
+#define x d0
+#define bp d4
+#define y d1
+@ log series:
+#define u d19
+#define v d20
+#define lg2coeff d21
+#define bpa d5
+#define bpb d3
+#define lg2const d6
+#define xmantissa r8
+#define twoto1o5 r4
+#define twoto3o5 r5
+#define ix r6
+#define iEXP_MASK r7
+@ exp input setup:
+#define twoto1o8mask d3
+#define twoto1o4mask d4
+#define twoto1o2mask d1
+#define ylg2x_round_offset d16
+#define ylg2x_temp d17
+#define yn_temp d18
+#define yn_round_offset d19
+#define ln2 d5
+@ Careful, overwriting HIGH_WORD_MASK, reset it if you need it again ...
+#define rounded_exponent d31
+@ exp series:
+#define k5 d23
+#define k6 d22
+#define k7 d21
+#define k8 d20
+#define ss3 d19
+@ overwrite double_1 (we're done with it by now)
+#define k0 d28
+#define twoto1o4 d6
+
+@instructions that gas doesn't like to encode correctly:
+#define vmov_f64 fconstd
+#define vmov_f32 fconsts
+#define vmovne_f64 fconstdne
+
+ENTRY(pow_neon)
+#if defined(KRAIT_NO_AAPCS_VFP_MODE)
+ @ ARM ABI has inputs coming in via r registers, lets move to a d register
+ vmov x, x_lw, x_hw
+#endif
+ push {r4, r5, r6, r7, r8, r9, r10, lr}
+
+ @ pre-staged bp values
+ vldr bpa, .LbpA
+ vldr bpb, .LbpB
+ @ load two fifths into constant term in case we need it due to offsets
+ vldr lg2const, .Ltwofifths
+
+ @ bp is initially 1.0, may adjust later based on x value
+ vmov_f64 bp, #0x70
+
+ @ extract the mantissa from x for scaled value comparisons
+ lsl xmantissa, x_hw, #12
+
+ @ twoto1o5 = 2^(1/5) (input bracketing)
+ movw twoto1o5, #0x186c
+ movt twoto1o5, #0x2611
+ @ twoto3o5 = 2^(3/5) (input bracketing)
+ movw twoto3o5, #0x003b
+ movt twoto3o5, #0x8406
+
+ @ finish extracting xmantissa
+ orr xmantissa, xmantissa, x_lw, lsr #20
+
+ @ begin preparing a mask for normalization
+ vmov.i64 HIGH_WORD_MASK, #0xffffffff00000000
+
+ @ double_1 = (double) 1.0
+ vmov_f64 double_1, #0x70
+
+#if defined(KRAIT_NO_AAPCS_VFP_MODE)
+ @ move y from r registers to a d register
+ vmov y, y_lw, y_hw
+#endif
+
+ cmp xmantissa, twoto1o5
+
+ vshl.i64 EXPONENT_MASK, HIGH_WORD_MASK, #20
+ vshr.u64 int_1, HIGH_WORD_MASK, #63
+
+ adr literals, .LliteralTable
+
+ bhi .Lxgt2to1over5
+ @ zero out lg2 constant term if don't offset our input
+ vsub.f64 lg2const, lg2const, lg2const
+ b .Lxle2to1over5
+
+.Lxgt2to1over5:
+ @ if normalized x > 2^(1/5), bp = 1 + (2^(2/5)-1) = 2^(2/5)
+ vadd.f64 bp, bp, bpa
+
+.Lxle2to1over5:
+ @ will need ln2 for various things
+ vldr ln2, .Lln2
+
+ cmp xmantissa, twoto3o5
+@@@@ X Value Normalization @@@@
+
+ @ ss = abs(x) 2^(-1024)
+ vbic.i64 ss, x, EXPONENT_MASK
+
+ @ N = (floor(log2(x)) + 0x3ff) * 2^52
+ vand.i64 int_n, x, EXPONENT_MASK
+
+ bls .Lxle2to3over5
+ @ if normalized x > 2^(3/5), bp = 2^(2/5) + (2^(4/5) - 2^(2/5) = 2^(4/5)
+ vadd.f64 bp, bp, bpb
+ vadd.f64 lg2const, lg2const, lg2const
+
+.Lxle2to3over5:
+
+ @ load log2 polynomial series constants
+ vldm literals!, {k4, k3, k2, k1}
+
+ @ s = abs(x) 2^(-floor(log2(x))) (normalize abs(x) to around 1)
+ vorr.i64 ss, ss, double_1
+
+@@@@ 3/2 (Log(bp(1+s)/(1-s))) input computation (s = (x-bp)/(x+bp)) @@@@
+
+ vsub.f64 u, ss, bp
+ vadd.f64 v, ss, bp
+
+ @ s = (x-1)/(x+1)
+ vdiv.f64 ss, u, v
+
+ @ load 2/(3log2) into lg2coeff
+ vldr lg2coeff, .Ltwooverthreeln2
+
+ @ N = floor(log2(x)) * 2^52
+ vsub.i64 int_n, int_n, double_1
+
+@@@@ 3/2 (Log(bp(1+s)/(1-s))) polynomial series @@@@
+
+ @ ss2 = ((x-dp)/(x+dp))^2
+ vmul.f64 ss2, ss, ss
+ @ ylg2x = 3.0
+ vmov_f64 ylg2x, #8
+ vmul.f64 ss4, ss2, ss2
+
+ @ todo: useful later for two-way clamp
+ vmul.f64 lg2coeff, lg2coeff, y
+
+ @ N = floor(log2(x))
+ vshr.s64 int_n, int_n, #52
+
+ @ k3 = ss^2 * L4 + L3
+ vmla.f64 k3, ss2, k4
+
+ @ k1 = ss^2 * L2 + L1
+ vmla.f64 k1, ss2, k2
+
+ @ scale ss by 2/(3 ln 2)
+ vmul.f64 lg2coeff, ss, lg2coeff
+
+ @ ylg2x = 3.0 + s^2
+ vadd.f64 ylg2x, ylg2x, ss2
+
+ vcvt.f64.s32 double_n, int_n_low
+
+ @ k1 = s^4 (s^2 L4 + L3) + s^2 L2 + L1
+ vmla.f64 k1, ss4, k3
+
+ @ add in constant term
+ vadd.f64 double_n, lg2const
+
+ @ ylg2x = 3.0 + s^2 + s^4 (s^4 (s^2 L4 + L3) + s^2 L2 + L1)
+ vmla.f64 ylg2x, ss4, k1
+
+ @ ylg2x = y 2 s / (3 ln(2)) (3.0 + s^2 + s^4 (s^4(s^2 L4 + L3) + s^2 L2 + L1)
+ vmul.f64 ylg2x, lg2coeff, ylg2x
+
+@@@@ Compute input to Exp(s) (s = y(n + log2(x)) - (floor(8 yn + 1)/8 + floor(8 ylog2(x) + 1)/8) @@@@@
+
+ @ mask to extract bit 1 (2^-2 from our fixed-point representation)
+ vshl.u64 twoto1o4mask, int_1, #1
+
+ @ double_n = y * n
+ vmul.f64 double_n, double_n, y
+
+ @ Load 2^(1/4) for later computations
+ vldr twoto1o4, .Ltwoto1o4
+
+ @ either add or subtract one based on the sign of double_n and ylg2x
+ vshr.s64 ylg2x_round_offset, ylg2x, #62
+ vshr.s64 yn_round_offset, double_n, #62
+
+ @ move unmodified y*lg2x into temp space
+ vmov ylg2x_temp, ylg2x
+ @ compute floor(8 y * n + 1)/8
+ @ and floor(8 y (log2(x)) + 1)/8
+ vcvt.s32.f64 ylg2x, ylg2x, #3
+ @ move unmodified y*n into temp space
+ vmov yn_temp, double_n
+ vcvt.s32.f64 double_n, double_n, #3
+
+ @ load exp polynomial series constants
+ vldm literals!, {k8, k7, k6, k5, k4, k3, k2, k1}
+
+ @ mask to extract bit 2 (2^-1 from our fixed-point representation)
+ vshl.u64 twoto1o2mask, int_1, #2
+
+ @ make rounding offsets either 1 or -1 instead of 0 or -2
+ vorr.u64 ylg2x_round_offset, ylg2x_round_offset, int_1
+ vorr.u64 yn_round_offset, yn_round_offset, int_1
+
+ @ round up to the nearest 1/8th
+ vadd.s32 ylg2x, ylg2x, ylg2x_round_offset
+ vadd.s32 double_n, double_n, yn_round_offset
+
+ @ clear out round-up bit for y log2(x)
+ vbic.s32 ylg2x, ylg2x, int_1
+ @ clear out round-up bit for yn
+ vbic.s32 double_n, double_n, int_1
+ @ add together the (fixed precision) rounded parts
+ vadd.s64 rounded_exponent, double_n, ylg2x
+ @ turn int_n into a double with value 2^int_n
+ vshl.i64 int_n, rounded_exponent, #49
+ @ compute masks for 2^(1/4) and 2^(1/2) fixups for fractional part of fixed-precision rounded values:
+ vand.u64 twoto1o4mask, twoto1o4mask, rounded_exponent
+ vand.u64 twoto1o2mask, twoto1o2mask, rounded_exponent
+
+ @ convert back into floating point, double_n now holds (double) floor(8 y * n + 1)/8
+ @ ylg2x now holds (double) floor(8 y * log2(x) + 1)/8
+ vcvt.f64.s32 ylg2x, ylg2x, #3
+ vcvt.f64.s32 double_n, double_n, #3
+
+ @ put the 2 bit (0.5) through the roof of twoto1o2mask (make it 0x0 or 0xffffffffffffffff)
+ vqshl.u64 twoto1o2mask, twoto1o2mask, #62
+ @ put the 1 bit (0.25) through the roof of twoto1o4mask (make it 0x0 or 0xffffffffffffffff)
+ vqshl.u64 twoto1o4mask, twoto1o4mask, #63
+
+ @ center y*log2(x) fractional part between -0.125 and 0.125 by subtracting (double) floor(8 y * log2(x) + 1)/8
+ vsub.f64 ylg2x_temp, ylg2x_temp, ylg2x
+ @ center y*n fractional part between -0.125 and 0.125 by subtracting (double) floor(8 y * n + 1)/8
+ vsub.f64 yn_temp, yn_temp, double_n
+
+ @ Add fractional parts of yn and y log2(x) together
+ vadd.f64 ss, ylg2x_temp, yn_temp
+
+ @ Result = 1.0 (offset for exp(s) series)
+ vmov_f64 Result, #0x70
+
+ @ multiply fractional part of y * log2(x) by ln(2)
+ vmul.f64 ss, ln2, ss
+
+@@@@ 10th order polynomial series for Exp(s) @@@@
+
+ @ ss2 = (ss)^2
+ vmul.f64 ss2, ss, ss
+
+ @ twoto1o2mask = twoto1o2mask & twoto1o4
+ vand.u64 twoto1o2mask, twoto1o2mask, twoto1o4
+ @ twoto1o2mask = twoto1o2mask & twoto1o4
+ vand.u64 twoto1o4mask, twoto1o4mask, twoto1o4
+
+ @ Result = 1.0 + ss
+ vadd.f64 Result, Result, ss
+
+ @ k7 = ss k8 + k7
+ vmla.f64 k7, ss, k8
+
+ @ ss4 = (ss*ss) * (ss*ss)
+ vmul.f64 ss4, ss2, ss2
+
+ @ twoto1o2mask = twoto1o2mask | (double) 1.0 - results in either 1.0 or 2^(1/4) in twoto1o2mask
+ vorr.u64 twoto1o2mask, twoto1o2mask, double_1
+ @ twoto1o2mask = twoto1o4mask | (double) 1.0 - results in either 1.0 or 2^(1/4) in twoto1o4mask
+ vorr.u64 twoto1o4mask, twoto1o4mask, double_1
+
+ @ TODO: should setup sign here, expadjustment = 1.0
+ vmov_f64 expadjustment, #0x70
+
+ @ ss3 = (ss*ss) * ss
+ vmul.f64 ss3, ss2, ss
+
+ @ k0 = 1/2 (first non-unity coefficient)
+ vmov_f64 k0, #0x60
+
+ @ Mask out non-exponent bits to make sure we have just 2^int_n
+ vand.i64 int_n, int_n, EXPONENT_MASK
+
+ @ square twoto1o2mask to get 1.0 or 2^(1/2)
+ vmul.f64 twoto1o2mask, twoto1o2mask, twoto1o2mask
+ @ multiply twoto2o4mask into the exponent output adjustment value
+ vmul.f64 expadjustment, expadjustment, twoto1o4mask
+
+ @ k5 = ss k6 + k5
+ vmla.f64 k5, ss, k6
+
+ @ k3 = ss k4 + k3
+ vmla.f64 k3, ss, k4
+
+ @ k1 = ss k2 + k1
+ vmla.f64 k1, ss, k2
+
+ @ multiply twoto1o2mask into exponent output adjustment value
+ vmul.f64 expadjustment, expadjustment, twoto1o2mask
+
+ @ k5 = ss^2 ( ss k8 + k7 ) + ss k6 + k5
+ vmla.f64 k5, ss2, k7
+
+ @ k1 = ss^2 ( ss k4 + k3 ) + ss k2 + k1
+ vmla.f64 k1, ss2, k3
+
+ @ Result = 1.0 + ss + 1/2 ss^2
+ vmla.f64 Result, ss2, k0
+
+ @ Adjust int_n so that it's a double precision value that can be multiplied by Result
+ vadd.i64 expadjustment, int_n, expadjustment
+
+ @ k1 = ss^4 ( ss^2 ( ss k8 + k7 ) + ss k6 + k5 ) + ss^2 ( ss k4 + k3 ) + ss k2 + k1
+ vmla.f64 k1, ss4, k5
+
+ @ Result = 1.0 + ss + 1/2 ss^2 + ss^3 ( ss^4 ( ss^2 ( ss k8 + k7 ) + ss k6 + k5 ) + ss^2 ( ss k4 + k3 ) + ss k2 + k1 )
+ vmla.f64 Result, ss3, k1
+
+ @ multiply by adjustment (sign*(rounding ? sqrt(2) : 1) * 2^int_n)
+ vmul.f64 Result, expadjustment, Result
+
+.LleavePow:
+#if defined(KRAIT_NO_AAPCS_VFP_MODE)
+ @ return Result (FP)
+ vmov Return_lw, Return_hw, Result
+#endif
+.LleavePowDirect:
+ @ leave directly returning whatever is in Return_lw and Return_hw
+ pop {r4, r5, r6, r7, r8, r9, r10, pc}
+
+.align 6
+.LliteralTable:
+@ Least-sqares tuned constants for 11th order (log2((1+s)/(1-s)):
+.LL4: @ ~3/11
+ .long 0x53a79915, 0x3fd1b108
+.LL3: @ ~1/3
+ .long 0x9ca0567a, 0x3fd554fa
+.LL2: @ ~3/7
+ .long 0x1408e660, 0x3fdb6db7
+.LL1: @ ~3/5
+ .long 0x332D4313, 0x3fe33333
+
+@ Least-squares tuned constants for 10th order exp(s):
+.LE10: @ ~1/3628800
+ .long 0x25c7ba0a, 0x3e92819b
+.LE9: @ ~1/362880
+ .long 0x9499b49c, 0x3ec72294
+.LE8: @ ~1/40320
+ .long 0xabb79d95, 0x3efa019f
+.LE7: @ ~1/5040
+ .long 0x8723aeaa, 0x3f2a019f
+.LE6: @ ~1/720
+ .long 0x16c76a94, 0x3f56c16c
+.LE5: @ ~1/120
+ .long 0x11185da8, 0x3f811111
+.LE4: @ ~1/24
+ .long 0x5555551c, 0x3fa55555
+.LE3: @ ~1/6
+ .long 0x555554db, 0x3fc55555
+
+.LbpA: @ (2^(2/5) - 1)
+ .long 0x4ee54db1, 0x3fd472d1
+
+.LbpB: @ (2^(4/5) - 2^(2/5))
+ .long 0x1c8a36cf, 0x3fdafb62
+
+.Ltwofifths: @
+ .long 0x9999999a, 0x3fd99999
+
+.Ltwooverthreeln2:
+ .long 0xDC3A03FD, 0x3FEEC709
+
+.Lln2: @ ln(2)
+ .long 0xFEFA39EF, 0x3FE62E42
+
+.Ltwoto1o4: @ 2^1/4
+ .long 0x0a31b715, 0x3ff306fe
+END(pow)
diff --git a/libm/arm/s_cos.S b/libm/arm/s_cos.S
new file mode 100644
index 0000000..30a6767
--- /dev/null
+++ b/libm/arm/s_cos.S
@@ -0,0 +1,419 @@
+@ Copyright (c) 2012, The Linux Foundation. All rights reserved.
+@
+@ Redistribution and use in source and binary forms, with or without
+@ modification, are permitted provided that the following conditions are
+@ met:
+@ * Redistributions of source code must retain the above copyright
+@ notice, this list of conditions and the following disclaimer.
+@ * Redistributions in binary form must reproduce the above
+@ copyright notice, this list of conditions and the following
+@ disclaimer in the documentation and/or other materials provided
+@ with the distribution.
+@ * Neither the name of Code Aurora Forum, Inc. nor the names of its
+@ contributors may be used to endorse or promote products derived
+@ from this software without specific prior written permission.
+@
+@ THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+@ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+@ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
+@ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+@ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+@ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+@ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+@ BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+@ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+@ OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+@ IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+@
+@ Additional notices preserved for attributions purposes only.
+@
+@ ====================================================
+@ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+@
+@ Developed at SunSoft, a Sun Microsystems, Inc. business.
+@ Permission to use, copy, modify, and distribute this
+@ software is freely granted, provided that this notice
+@ is preserved.
+@ ====================================================
+@
+@ ====================================================
+@ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+@
+@ Developed at SunPro, a Sun Microsystems, Inc. business.
+@ Permission to use, copy, modify, and distribute this
+@ software is freely granted, provided that this notice
+@ is preserved.
+@ ====================================================
+
+#include <machine/cpu-features.h>
+#include <machine/asm.h>
+
+#define vmov_f64 fconstd
+
+ENTRY(cos)
+ push {r4, r6, r7, lr}
+ vmov d0, r0, r1
+ mov r2, r0
+ mov r3, r1
+ movw r1, #0x21fb
+ movt r1, #0x3fe9
+ mov r4, r3
+ bic r3, r3, #0x80000000
+ sub sp, sp, #48
+ cmp r3, r1
+ bgt .Lxgtpio4
+ cmp r3, #0x3e400000
+ bge .Lxnottiny
+ vcvt.s32.f64 s15, d0
+ vmov r3, s15
+ cmp r3, #0
+ beq .Lreturnone
+.Lxnottiny:
+ vmov.i64 d1, #0
+ bl __kernel_cos
+.Lleave_cos:
+ vmov r0, r1, d0
+.Lleave_cos_direct:
+ add sp, sp, #48
+ pop {r4, r6, r7, pc}
+.Lxgtpio4:
+ movw r2, #0xffff
+ movt r2, #0x7fef
+ cmp r3, r2
+ bgt .LxisNaN
+ movw r0, #0xd97b
+ movt r0, #0x4002
+ cmp r3, r0
+ movw r2, #0x21fb
+ bgt .Lxge3pio4
+ cmp r4, #0
+ movt r2, #0x3ff9
+ ble .Lsmallxisnegative
+ vldr d16, .Lpio2_1
+ cmp r3, r2
+ vsub.f64 d16, d0, d16
+ beq .Lxnearpio2
+ vldr d17, .Lpio2_1t
+.Lfinalizesmallxremainder:
+ vsub.f64 d0, d16, d17
+ vsub.f64 d16, d16, d0
+ vstr d0, [sp, #8]
+ vsub.f64 d1, d16, d17
+ vstr d1, [sp, #16]
+.Lnmod3is1:
+ mov r0, #1
+ bl __kernel_sin
+ vneg.f64 d0, d0
+ b .Lleave_cos
+.Lreturnone:
+ mov r0, #0
+ movw r1, #0x0000
+ movt r1, #0x3ff0
+ vmov_f64 d0, #0x70
+ b .Lleave_cos_direct
+.LxisNaN:
+ vsub.f64 d0, d0, d0
+ b .Lleave_cos
+.Lxge3pio4:
+ movt r2, #0x4139
+ cmp r3, r2
+ bgt .Lxgigantic
+ vmov_f64 d3, #0x60
+ vldr d2, .Linvpio2
+ vldr d18, .Lpio2_1
+ vabs.f64 d16, d0
+ vmla.f64 d3, d16, d2
+ vcvt.s32.f64 s3, d3
+ vcvt.f64.s32 d17, s3
+ vmov r0, s3
+ cmp r0, #31
+ vmls.f64 d16, d17, d18
+ vldr d18, .Lpio2_1t
+ vmul.f64 d18, d17, d18
+ bgt .Lcomputeremainder
+ ldr r2, .Lnpio2_hw_ptr
+ sub lr, r0, #1
+.LPICnpio2_hw0:
+ add r12, pc, r2
+ ldr r1, [r12, lr, lsl #2]
+ cmp r3, r1
+ beq .Lcomputeremainder
+.Lfinishthirditeration:
+ vsub.f64 d0, d16, d18
+ vstr d0, [sp, #8]
+.Lfinishcomputingremainder:
+ vsub.f64 d16, d16, d0
+ cmp r4, #0
+ vsub.f64 d1, d16, d18
+ vstr d1, [sp, #16]
+ blt .Lhandlenegativex
+.Lselectregion:
+ and r0, r0, #3
+ cmp r0, #1
+ beq .Lnmod3is1
+ cmp r0, #2
+ beq .Lnmod3is2
+ cmp r0, #0
+ bne .Lnmod3is0
+ bl __kernel_cos
+ b .Lleave_cos
+.Lxgigantic:
+ asr r2, r3, #20
+ vmov r6, r7, d0
+ sub r2, r2, #1040
+ mov r0, r6
+ sub r2, r2, #6
+ vldr d16, .Ltwo24
+ sub r1, r3, r2, lsl #20
+ vmov d18, r0, r1
+ vcvt.s32.f64 s15, d18
+ add r1, sp, #48
+ mov r3, #3
+ vcvt.f64.s32 d17, s15
+ vsub.f64 d18, d18, d17
+ vstr d17, [sp, #24]
+ vmul.f64 d18, d18, d16
+ vcvt.s32.f64 s15, d18
+ vcvt.f64.s32 d17, s15
+ vsub.f64 d18, d18, d17
+ vstr d17, [sp, #32]
+ vmul.f64 d16, d18, d16
+ fcmpzd d16
+ vstmdb r1!, {d16}
+ vmrs APSR_nzcv, fpscr
+ bne .Lprocessnonzeroterm
+.Lskipzeroterms:
+ vldmdb r1!, {d16}
+ sub r3, r3, #1
+ fcmpzd d16
+ vmrs APSR_nzcv, fpscr
+ beq .Lskipzeroterms
+.Lprocessnonzeroterm:
+ ldr r12, .Ltwo_over_pi_ptr
+ add r0, sp, #24
+ add r1, sp, #8
+.LPICtwo_over_pi0:
+ add lr, pc, r12
+ mov r12, #2
+ str lr, [sp, #4]
+ str r12, [sp]
+ bl __kernel_rem_pio2
+ cmp r4, #0
+ vldr d0, [sp, #8]
+ blt .Lhandlenegativxalso
+ vldr d1, [sp, #16]
+ b .Lselectregion
+.Lxnearpio2:
+ vldr d17, .Lpio2_2
+ vsub.f64 d16, d16, d17
+ vldr d17, .Lpio2_2t
+ b .Lfinalizesmallxremainder
+.Lsmallxisnegative:
+ vldr d1, .Lpio2_1
+ cmp r3, r2
+ vadd.f64 d16, d0, d1
+ beq .Lxnearnegpio2
+ vldr d17, .Lpio2_1t
+.Lfinalizesmallnegxremainder:
+ vadd.f64 d0, d16, d17
+ vsub.f64 d16, d16, d0
+ vstr d0, [sp, #8]
+ vadd.f64 d1, d16, d17
+ vstr d1, [sp, #16]
+.Lnmod3is0:
+ mov r0, #1
+ bl __kernel_sin
+ b .Lleave_cos
+.Lnmod3is2:
+ bl __kernel_cos
+ vneg.f64 d0, d0
+ b .Lleave_cos
+.Lcomputeremainder:
+ vsub.f64 d0, d16, d18
+ asr r1, r3, #20
+ vmov r2, r3, d0
+ ubfx r3, r3, #20, #11
+ rsb r3, r3, r1
+ vstr d0, [sp, #8]
+ cmp r3, #16
+ ble .Lfinishcomputingremainder
+ vldr d18, .Lpio2_2
+ vmul.f64 d20, d17, d18
+ vsub.f64 d19, d16, d20
+ vsub.f64 d16, d16, d19
+ vsub.f64 d18, d16, d20
+ vldr d16, .Lpio2_2t
+ vnmls.f64 d18, d17, d16
+ vsub.f64 d0, d19, d18
+ vmov r2, r3, d0
+ ubfx r3, r3, #20, #11
+ rsb r1, r3, r1
+ vstr d0, [sp, #8]
+ cmp r1, #49
+ ble .Lfinishseconditeration
+ vldr d5, .Lpio2_3
+ vmul.f64 d20, d17, d5
+ vsub.f64 d16, d19, d20
+ vsub.f64 d4, d19, d16
+ vldr d19, .Lpio2_3t
+ vsub.f64 d18, d4, d20
+ vnmls.f64 d18, d17, d19
+ b .Lfinishthirditeration
+.Lhandlenegativex:
+ vneg.f64 d0, d0
+ rsb r0, r0, #0
+ vneg.f64 d1, d1
+ vstr d0, [sp, #8]
+ vstr d1, [sp, #16]
+ b .Lselectregion
+.Lfinishseconditeration:
+ vmov d16, d19
+ b .Lfinishcomputingremainder
+.Lxnearnegpio2:
+ vldr d0, .Lpio2_2
+ vldr d17, .Lpio2_2t
+ vadd.f64 d16, d16, d0
+ b .Lfinalizesmallnegxremainder
+.Lhandlenegativxalso:
+ vldr d6, [sp, #16]
+ vneg.f64 d0, d0
+ rsb r0, r0, #0
+ vneg.f64 d1, d6
+ vstr d0, [sp, #8]
+ vstr d1, [sp, #16]
+ b .Lselectregion
+
+.align 3
+.Lpio2_1:
+ .word 0x54400000, 0x3ff921fb
+.Lpio2_1t:
+ .word 0x1a626331, 0x3dd0b461
+.Linvpio2:
+ .word 0x6dc9c883, 0x3fe45f30
+.Ltwo24:
+ .word 0x00000000, 0x41700000
+.Lpio2_2:
+ .word 0x1a600000, 0x3dd0b461
+.Lpio2_2t:
+ .word 0x2e037073, 0x3ba3198a
+.Lpio2_3:
+ .word 0x2e000000, 0x3ba3198a
+.Lpio2_3t:
+ .word 0x252049c1, 0x397b839a
+.Lnpio2_hw_ptr:
+ .word .Lnpio2_hw-(.LPICnpio2_hw0+8)
+.Ltwo_over_pi_ptr:
+ .word .Ltwo_over_pi-(.LPICtwo_over_pi0+8)
+END(cos)
+
+ .section .rodata.npio2_hw,"a",%progbits
+ .align 2
+.Lnpio2_hw = . + 0
+ .type npio2_hw, %object
+ .size npio2_hw, 128
+npio2_hw:
+ .word 0x3ff921fb
+ .word 0x400921fb
+ .word 0x4012d97c
+ .word 0x401921fb
+ .word 0x401f6a7a
+ .word 0x4022d97c
+ .word 0x4025fdbb
+ .word 0x402921fb
+ .word 0x402c463a
+ .word 0x402f6a7a
+ .word 0x4031475c
+ .word 0x4032d97c
+ .word 0x40346b9c
+ .word 0x4035fdbb
+ .word 0x40378fdb
+ .word 0x403921fb
+ .word 0x403ab41b
+ .word 0x403c463a
+ .word 0x403dd85a
+ .word 0x403f6a7a
+ .word 0x40407e4c
+ .word 0x4041475c
+ .word 0x4042106c
+ .word 0x4042d97c
+ .word 0x4043a28c
+ .word 0x40446b9c
+ .word 0x404534ac
+ .word 0x4045fdbb
+ .word 0x4046c6cb
+ .word 0x40478fdb
+ .word 0x404858eb
+ .word 0x404921fb
+
+ .section .rodata.two_over_pi,"a",%progbits
+ .align 2
+.Ltwo_over_pi = . + 0
+ .type two_over_pi, %object
+ .size two_over_pi, 264
+two_over_pi:
+ .word 0x00a2f983
+ .word 0x006e4e44
+ .word 0x001529fc
+ .word 0x002757d1
+ .word 0x00f534dd
+ .word 0x00c0db62
+ .word 0x0095993c
+ .word 0x00439041
+ .word 0x00fe5163
+ .word 0x00abdebb
+ .word 0x00c561b7
+ .word 0x00246e3a
+ .word 0x00424dd2
+ .word 0x00e00649
+ .word 0x002eea09
+ .word 0x00d1921c
+ .word 0x00fe1deb
+ .word 0x001cb129
+ .word 0x00a73ee8
+ .word 0x008235f5
+ .word 0x002ebb44
+ .word 0x0084e99c
+ .word 0x007026b4
+ .word 0x005f7e41
+ .word 0x003991d6
+ .word 0x00398353
+ .word 0x0039f49c
+ .word 0x00845f8b
+ .word 0x00bdf928
+ .word 0x003b1ff8
+ .word 0x0097ffde
+ .word 0x0005980f
+ .word 0x00ef2f11
+ .word 0x008b5a0a
+ .word 0x006d1f6d
+ .word 0x00367ecf
+ .word 0x0027cb09
+ .word 0x00b74f46
+ .word 0x003f669e
+ .word 0x005fea2d
+ .word 0x007527ba
+ .word 0x00c7ebe5
+ .word 0x00f17b3d
+ .word 0x000739f7
+ .word 0x008a5292
+ .word 0x00ea6bfb
+ .word 0x005fb11f
+ .word 0x008d5d08
+ .word 0x00560330
+ .word 0x0046fc7b
+ .word 0x006babf0
+ .word 0x00cfbc20
+ .word 0x009af436
+ .word 0x001da9e3
+ .word 0x0091615e
+ .word 0x00e61b08
+ .word 0x00659985
+ .word 0x005f14a0
+ .word 0x0068408d
+ .word 0x00ffd880
+ .word 0x004d7327
+ .word 0x00310606
+ .word 0x001556ca
+ .word 0x0073a8c9
+ .word 0x0060e27b
+ .word 0x00c08c6b
diff --git a/libm/arm/s_sin.S b/libm/arm/s_sin.S
new file mode 100644
index 0000000..9c3366c
--- /dev/null
+++ b/libm/arm/s_sin.S
@@ -0,0 +1,414 @@
+@ Copyright (c) 2012, The Linux Foundation. All rights reserved.
+@
+@ Redistribution and use in source and binary forms, with or without
+@ modification, are permitted provided that the following conditions are
+@ met:
+@ * Redistributions of source code must retain the above copyright
+@ notice, this list of conditions and the following disclaimer.
+@ * Redistributions in binary form must reproduce the above
+@ copyright notice, this list of conditions and the following
+@ disclaimer in the documentation and/or other materials provided
+@ with the distribution.
+@ * Neither the name of Code Aurora Forum, Inc. nor the names of its
+@ contributors may be used to endorse or promote products derived
+@ from this software without specific prior written permission.
+@
+@ THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+@ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+@ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
+@ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+@ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+@ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+@ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+@ BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+@ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+@ OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+@ IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+@
+@ Additional notices preserved for attributions purposes only.
+@
+@ ====================================================
+@ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+@
+@ Developed at SunSoft, a Sun Microsystems, Inc. business.
+@ Permission to use, copy, modify, and distribute this
+@ software is freely granted, provided that this notice
+@ is preserved.
+@ ====================================================
+@
+@ ====================================================
+@ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+@
+@ Developed at SunPro, a Sun Microsystems, Inc. business.
+@ Permission to use, copy, modify, and distribute this
+@ software is freely granted, provided that this notice
+@ is preserved.
+@ ====================================================
+
+#include <machine/cpu-features.h>
+#include <machine/asm.h>
+
+#define vmov_f64 fconstd
+
+ENTRY(sin)
+ push {r4, r6, r7, lr}
+ vmov d0, r0, r1
+ mov r2, r0
+ mov r3, r1
+ movw r1, #0x21fb
+ movt r1, #0x3fe9
+ mov r4, r3
+ bic r3, r3, #0x80000000
+ sub sp, sp, #48
+ cmp r3, r1
+ bgt .Lxgtpio4
+ cmp r3, #0x3e400000
+ bge .Lxnottiny
+ vcvt.s32.f64 s15, d0
+ vmov r3, s15
+ cmp r3, #0
+ bne .Lxnottiny
+.Lleave_sin:
+ vmov r0, r1, d0
+ add sp, sp, #48
+ pop {r4, r6, r7, pc}
+.Lxgtpio4:
+ movw r2, #0xffff
+ movt r2, #0x7fef
+ cmp r3, r2
+ bgt .LxisNaN
+ movw r0, #0xd97b
+ movt r0, #0x4002
+ cmp r3, r0
+ movw r2, #0x21fb
+ bgt .Lxge3pio4
+ cmp r4, #0
+ movt r2, #0x3ff9
+ ble .Lsmallxisnegative
+ vldr d16, .Lpio2_1
+ cmp r3, r2
+ vsub.f64 d16, d0, d16
+ beq .Lxnearpio2
+ vldr d17, .Lpio2_1t
+.Lfinalizesmallxremainder:
+ vsub.f64 d0, d16, d17
+ vsub.f64 d16, d16, d0
+ vstr d0, [sp, #8]
+ vsub.f64 d1, d16, d17
+ vstr d1, [sp, #16]
+.Lnmod3is1:
+ bl __kernel_cos
+ b .Lleave_sin
+.Lxnottiny:
+ vmov.i64 d1, #0
+ mov r0, #0
+ bl __kernel_sin
+ b .Lleave_sin
+.LxisNaN:
+ vsub.f64 d0, d0, d0
+ b .Lleave_sin
+.Lxge3pio4:
+ movt r2, #0x4139
+ cmp r3, r2
+ bgt .Lxgigantic
+ vmov_f64 d3, #0x60
+ vldr d2, .Linvpio2
+ vldr d18, .Lpio2_1
+ vabs.f64 d16, d0
+ vmla.f64 d3, d16, d2
+ vcvt.s32.f64 s3, d3
+ vcvt.f64.s32 d17, s3
+ vmov r0, s3
+ cmp r0, #31
+ vmls.f64 d16, d17, d18
+ vldr d18, .Lpio2_1t
+ vmul.f64 d18, d17, d18
+ bgt .Lcomputeremainder
+ ldr r2, .Lnpio2_hw_ptr
+ sub lr, r0, #1
+.LPICnpio2_hw0:
+ add r12, pc, r2
+ ldr r1, [r12, lr, lsl #2]
+ cmp r3, r1
+ beq .Lcomputeremainder
+.Lfinishthirditeration:
+ vsub.f64 d0, d16, d18
+ vstr d0, [sp, #8]
+.Lfinishcomputingremainder:
+ vsub.f64 d16, d16, d0
+ cmp r4, #0
+ vsub.f64 d1, d16, d18
+ vstr d1, [sp, #16]
+ blt .Lhandlenegativex
+.Lselectregion:
+ and r0, r0, #3
+ cmp r0, #1
+ beq .Lnmod3is1
+ cmp r0, #2
+ beq .Lnmod3is2
+ cmp r0, #0
+ bne .Lnmod3is0
+ mov r0, #1
+ bl __kernel_sin
+ b .Lleave_sin
+.Lxgigantic:
+ asr r2, r3, #20
+ vmov r6, r7, d0
+ sub r2, r2, #1040
+ mov r0, r6
+ sub r2, r2, #6
+ vldr d16, .Ltwo24
+ sub r1, r3, r2, lsl #20
+ vmov d18, r0, r1
+ vcvt.s32.f64 s15, d18
+ add r1, sp, #48
+ mov r3, #3
+ vcvt.f64.s32 d17, s15
+ vsub.f64 d18, d18, d17
+ vstr d17, [sp, #24]
+ vmul.f64 d18, d18, d16
+ vcvt.s32.f64 s15, d18
+ vcvt.f64.s32 d17, s15
+ vsub.f64 d18, d18, d17
+ vstr d17, [sp, #32]
+ vmul.f64 d16, d18, d16
+ fcmpzd d16
+ vstmdb r1!, {d16}
+ vmrs APSR_nzcv, fpscr
+ bne .Lprocessnonzeroterm
+.Lskipzeroterms:
+ vldmdb r1!, {d16}
+ sub r3, r3, #1
+ fcmpzd d16
+ vmrs APSR_nzcv, fpscr
+ beq .Lskipzeroterms
+.Lprocessnonzeroterm:
+ ldr r12, .Ltwo_over_pi_ptr
+ add r0, sp, #24
+ add r1, sp, #8
+.LPICtwo_over_pi0:
+ add lr, pc, r12
+ mov r12, #2
+ str lr, [sp, #4]
+ str r12, [sp]
+ bl __kernel_rem_pio2
+ cmp r4, #0
+ vldr d0, [sp, #8]
+ blt .Lhandlenegativexalso
+ vldr d1, [sp, #16]
+ b .Lselectregion
+.Lxnearpio2:
+ vldr d17, .Lpio2_2
+ vsub.f64 d16, d16, d17
+ vldr d17, .Lpio2_2t
+ b .Lfinalizesmallxremainder
+.Lsmallxisnegative:
+ vldr d1, .Lpio2_1
+ cmp r3, r2
+ vadd.f64 d16, d0, d1
+ beq .Lxnearnegpio2
+ vldr d17, .Lpio2_1t
+.Lfinalizesmallnegxremainder:
+ vadd.f64 d0, d16, d17
+ vsub.f64 d16, d16, d0
+ vstr d0, [sp, #8]
+ vadd.f64 d1, d16, d17
+ vstr d1, [sp, #16]
+.Lnmod3is0:
+ bl __kernel_cos
+ vneg.f64 d0, d0
+ b .Lleave_sin
+.Lnmod3is2:
+ mov r0, #1
+ bl __kernel_sin
+ vneg.f64 d0, d0
+ b .Lleave_sin
+.Lcomputeremainder:
+ vsub.f64 d0, d16, d18
+ asr r1, r3, #20
+ vmov r2, r3, d0
+ ubfx r3, r3, #20, #11
+ rsb r3, r3, r1
+ vstr d0, [sp, #8]
+ cmp r3, #16
+ ble .Lfinishcomputingremainder
+ vldr d18, .Lpio2_2
+ vmul.f64 d20, d17, d18
+ vsub.f64 d19, d16, d20
+ vsub.f64 d16, d16, d19
+ vsub.f64 d18, d16, d20
+ vldr d16, .Lpio2_2t
+ vnmls.f64 d18, d17, d16
+ vsub.f64 d0, d19, d18
+ vmov r2, r3, d0
+ ubfx r3, r3, #20, #11
+ rsb r1, r3, r1
+ vstr d0, [sp, #8]
+ cmp r1, #49
+ ble .Lfinishseconditeration
+ vldr d5, .Lpio2_3
+ vmul.f64 d20, d17, d5
+ vsub.f64 d16, d19, d20
+ vsub.f64 d4, d19, d16
+ vldr d19, .Lpio2_3t
+ vsub.f64 d18, d4, d20
+ vnmls.f64 d18, d17, d19
+ b .Lfinishthirditeration
+.Lhandlenegativex:
+ vneg.f64 d0, d0
+ rsb r0, r0, #0
+ vneg.f64 d1, d1
+ vstr d0, [sp, #8]
+ vstr d1, [sp, #16]
+ b .Lselectregion
+.Lfinishseconditeration:
+ vmov d16, d19
+ b .Lfinishcomputingremainder
+.Lxnearnegpio2:
+ vldr d0, .Lpio2_2
+ vldr d17, .Lpio2_2t
+ vadd.f64 d16, d16, d0
+ b .Lfinalizesmallnegxremainder
+.Lhandlenegativexalso:
+ vldr d6, [sp, #16]
+ vneg.f64 d0, d0
+ rsb r0, r0, #0
+ vneg.f64 d1, d6
+ vstr d0, [sp, #8]
+ vstr d1, [sp, #16]
+ b .Lselectregion
+
+.align 3
+.Lpio2_1:
+ .word 0x54400000, 0x3ff921fb
+.Lpio2_1t:
+ .word 0x1a626331, 0x3dd0b461
+.Linvpio2:
+ .word 0x6dc9c883, 0x3fe45f30
+.Ltwo24:
+ .word 0x00000000, 0x41700000
+.Lpio2_2:
+ .word 0x1a600000, 0x3dd0b461
+.Lpio2_2t:
+ .word 0x2e037073, 0x3ba3198a
+.Lpio2_3:
+ .word 0x2e000000, 0x3ba3198a
+.Lpio2_3t:
+ .word 0x252049c1, 0x397b839a
+.Lnpio2_hw_ptr:
+ .word .Lnpio2_hw-(.LPICnpio2_hw0+8)
+.Ltwo_over_pi_ptr:
+ .word .Ltwo_over_pi-(.LPICtwo_over_pi0+8)
+END(sin)
+
+ .section .rodata.npio2_hw,"a",%progbits
+ .align 2
+.Lnpio2_hw = . + 0
+ .type npio2_hw, %object
+ .size npio2_hw, 128
+npio2_hw:
+ .word 0x3ff921fb
+ .word 0x400921fb
+ .word 0x4012d97c
+ .word 0x401921fb
+ .word 0x401f6a7a
+ .word 0x4022d97c
+ .word 0x4025fdbb
+ .word 0x402921fb
+ .word 0x402c463a
+ .word 0x402f6a7a
+ .word 0x4031475c
+ .word 0x4032d97c
+ .word 0x40346b9c
+ .word 0x4035fdbb
+ .word 0x40378fdb
+ .word 0x403921fb
+ .word 0x403ab41b
+ .word 0x403c463a
+ .word 0x403dd85a
+ .word 0x403f6a7a
+ .word 0x40407e4c
+ .word 0x4041475c
+ .word 0x4042106c
+ .word 0x4042d97c
+ .word 0x4043a28c
+ .word 0x40446b9c
+ .word 0x404534ac
+ .word 0x4045fdbb
+ .word 0x4046c6cb
+ .word 0x40478fdb
+ .word 0x404858eb
+ .word 0x404921fb
+
+ .section .rodata.two_over_pi,"a",%progbits
+ .align 2
+.Ltwo_over_pi = . + 0
+ .type two_over_pi, %object
+ .size two_over_pi, 264
+two_over_pi:
+ .word 0x00a2f983
+ .word 0x006e4e44
+ .word 0x001529fc
+ .word 0x002757d1
+ .word 0x00f534dd
+ .word 0x00c0db62
+ .word 0x0095993c
+ .word 0x00439041
+ .word 0x00fe5163
+ .word 0x00abdebb
+ .word 0x00c561b7
+ .word 0x00246e3a
+ .word 0x00424dd2
+ .word 0x00e00649
+ .word 0x002eea09
+ .word 0x00d1921c
+ .word 0x00fe1deb
+ .word 0x001cb129
+ .word 0x00a73ee8
+ .word 0x008235f5
+ .word 0x002ebb44
+ .word 0x0084e99c
+ .word 0x007026b4
+ .word 0x005f7e41
+ .word 0x003991d6
+ .word 0x00398353
+ .word 0x0039f49c
+ .word 0x00845f8b
+ .word 0x00bdf928
+ .word 0x003b1ff8
+ .word 0x0097ffde
+ .word 0x0005980f
+ .word 0x00ef2f11
+ .word 0x008b5a0a
+ .word 0x006d1f6d
+ .word 0x00367ecf
+ .word 0x0027cb09
+ .word 0x00b74f46
+ .word 0x003f669e
+ .word 0x005fea2d
+ .word 0x007527ba
+ .word 0x00c7ebe5
+ .word 0x00f17b3d
+ .word 0x000739f7
+ .word 0x008a5292
+ .word 0x00ea6bfb
+ .word 0x005fb11f
+ .word 0x008d5d08
+ .word 0x00560330
+ .word 0x0046fc7b
+ .word 0x006babf0
+ .word 0x00cfbc20
+ .word 0x009af436
+ .word 0x001da9e3
+ .word 0x0091615e
+ .word 0x00e61b08
+ .word 0x00659985
+ .word 0x005f14a0
+ .word 0x0068408d
+ .word 0x00ffd880
+ .word 0x004d7327
+ .word 0x00310606
+ .word 0x001556ca
+ .word 0x0073a8c9
+ .word 0x0060e27b
+ .word 0x00c08c6b
diff --git a/libm/src/e_pow.c b/libm/src/e_pow.c
index d213132..bd82f30 100644
--- a/libm/src/e_pow.c
+++ b/libm/src/e_pow.c
@@ -61,6 +61,14 @@ static char rcsid[] = "$FreeBSD: src/lib/msun/src/e_pow.c,v 1.11 2005/02/04 18:2
#include "math.h"
#include "math_private.h"
+#if defined(KRAIT_NEON_OPTIMIZATION) || defined(SPARROW_NEON_OPTIMIZATION) || defined(SCORPION_NEON_OPTIMIZATION)
+#if defined(KRAIT_NO_AAPCS_VFP_MODE)
+double pow_neon(double x, double y);
+#else
+double pow_neon(double x, double y, int32_t lx, int32_t hx) __attribute__((pcs("aapcs-vfp")));
+#endif
+#endif
+
static const double
bp[] = {1.0, 1.5,},
dp_h[] = { 0.0, 5.84962487220764160156e-01,}, /* 0x3FE2B803, 0x40000000 */
@@ -108,12 +116,32 @@ __ieee754_pow(double x, double y)
ix = hx&0x7fffffff; iy = hy&0x7fffffff;
/* y==zero: x**0 = 1 */
- if((iy|ly)==0) return one;
- /* +-NaN return x+y */
- if(ix > 0x7ff00000 || ((ix==0x7ff00000)&&(lx!=0)) ||
- iy > 0x7ff00000 || ((iy==0x7ff00000)&&(ly!=0)))
- return x+y;
+ if (ly == 0) {
+ if (hy == ly) {
+ /* y==0.0, x**0 = 1 */
+ return one;
+ }
+ else if (iy > 0x7ff00000) {
+ /* y is NaN, return x+y (NaN) */
+ return x+y;
+ }
+ }
+ else if (iy >= 0x7ff00000) {
+ /* y is NaN, return x+y (NaN) */
+ return x+y;
+ }
+
+ if (lx == 0) {
+ if (ix > 0x7ff00000) {
+ /* x is NaN, return x+y (NaN) */
+ return x+y;
+ }
+ }
+ else if (ix >= 0x7ff00000) {
+ /* x is NaN, return x+y (NaN) */
+ return x+y;
+ }
/* determine if y is an odd int when x < 0
* yisint = 0 ... y is not an integer
@@ -201,6 +229,14 @@ __ieee754_pow(double x, double y)
t1 = u+v;
SET_LOW_WORD(t1,0);
t2 = v-(t1-u);
+#if defined(KRAIT_NEON_OPTIMIZATION) || defined(SPARROW_NEON_OPTIMIZATION) || defined(SCORPION_NEON_OPTIMIZATION)
+ } else if (ix <= 0x40100000 && iy <= 0x40100000 && hy > 0 && hx > 0) {
+#if defined(KRAIT_NO_AAPCS_VFP_MODE)
+ return pow_neon(x,y);
+#else
+ return pow_neon(x,y,lx,hx);
+#endif
+#endif
} else {
double ss,s2,s_h,s_l,t_h,t_l;
n = 0;
diff --git a/libm/src/k_cos.c b/libm/src/k_cos.c
index 00916d7..b8cdf8f 100644
--- a/libm/src/k_cos.c
+++ b/libm/src/k_cos.c
@@ -69,6 +69,17 @@ C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
double
__kernel_cos(double x, double y)
{
+#if defined(KRAIT_NEON_OPTIMIZATION)
+ double hz,z,zz,r,w,k;
+
+ z = x*x;
+ zz = z*z;
+ k = x*y;
+ hz = (float)0.5*z;
+ r = z*(z*(C1+z*(C2+z*((C3+z*C4)+zz*(C5+z*C6)))));
+ w = one-hz;
+ return w + (((one-w)-hz) + (r-k));
+#else
double hz,z,r,w;
z = x*x;
@@ -76,4 +87,5 @@ __kernel_cos(double x, double y)
hz = (float)0.5*z;
w = one-hz;
return w + (((one-w)-hz) + (z*r-x*y));
+#endif
}
diff --git a/libm/src/k_sin.c b/libm/src/k_sin.c
index ae06a9d..ee641d4 100644
--- a/libm/src/k_sin.c
+++ b/libm/src/k_sin.c
@@ -60,6 +60,16 @@ S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
double
__kernel_sin(double x, double y, int iy)
{
+#if defined(KRAIT_NEON_OPTIMIZATION)
+ double z,zz,r,v;
+
+ z = x*x;
+ zz = z*z;
+ v = z*x;
+ r = S2+z*((S3+z*S4)+zz*(S5+z*S6));
+ if(iy==0) return x+v*(S1+z*r);
+ else return x-((z*(half*y-v*r)-y)-v*S1);
+#else
double z,r,v;
z = x*x;
@@ -67,4 +77,5 @@ __kernel_sin(double x, double y, int iy)
r = S2+z*(S3+z*(S4+z*(S5+z*S6)));
if(iy==0) return x+v*(S1+z*r);
else return x-((z*(half*y-v*r)-y)-v*S1);
+#endif
}
diff --git a/libm/src/math_private.h b/libm/src/math_private.h
index 5f6e088..7cda2e9 100644
--- a/libm/src/math_private.h
+++ b/libm/src/math_private.h
@@ -257,11 +257,19 @@ cpackl(long double x, long double y)
#define __ieee754_ldexpf ldexpf
/* fdlibm kernel function */
+#if defined(KRAIT_NEON_OPTIMIZATION)
+int __ieee754_rem_pio2(double,double*) __attribute__((pcs("aapcs-vfp")));
+double __kernel_sin(double,double,int) __attribute__((pcs("aapcs-vfp")));
+double __kernel_cos(double,double) __attribute__((pcs("aapcs-vfp")));
+double __kernel_tan(double,double,int) __attribute__((pcs("aapcs-vfp")));
+int __kernel_rem_pio2(double*,double*,int,int,int,const int*) __attribute__((pcs("aapcs-vfp")));
+#else
int __ieee754_rem_pio2(double,double*);
double __kernel_sin(double,double,int);
double __kernel_cos(double,double);
double __kernel_tan(double,double,int);
int __kernel_rem_pio2(double*,double*,int,int,int,const int*);
+#endif
/* float versions of fdlibm kernel functions */
int __ieee754_rem_pio2f(float,float*);
diff --git a/linker/Android.mk b/linker/Android.mk
index e8c81db..19f75c8 100644
--- a/linker/Android.mk
+++ b/linker/Android.mk
@@ -43,6 +43,9 @@ endif
ifeq ($(TARGET_ARCH),mips)
LOCAL_CFLAGS += -DANDROID_MIPS_LINKER
endif
+ifeq ($(TARGET_HAVE_TEGRA_ERRATA_657451),true)
+ LOCAL_CFLAGS += -DHAVE_TEGRA_ERRATA_657451
+endif
LOCAL_MODULE:= linker
LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
diff --git a/linker/linker.cpp b/linker/linker.cpp
index 46d1335..2362099 100644
--- a/linker/linker.cpp
+++ b/linker/linker.cpp
@@ -641,33 +641,35 @@ static int open_library(const char *name)
return -1;
}
-// Returns 'true' if the library is prelinked or on failure so we error out
-// either way. We no longer support prelinking.
-static bool is_prelinked(int fd, const char* name)
+typedef struct {
+ long mmap_addr;
+ char tag[4]; /* 'P', 'R', 'E', ' ' */
+} prelink_info_t;
+
+/* Returns the requested base address if the library is prelinked,
+ * and 0 otherwise. */
+static unsigned long
+is_prelinked(int fd, const char *name)
{
- struct prelink_info_t {
- long mmap_addr;
- char tag[4]; // "PRE ".
- };
-
off_t sz = lseek(fd, -sizeof(prelink_info_t), SEEK_END);
if (sz < 0) {
- DL_ERR("lseek failed: %s", strerror(errno));
- return true;
+ DL_ERR("lseek() failed!");
+ return 0;
}
prelink_info_t info;
int rc = TEMP_FAILURE_RETRY(read(fd, &info, sizeof(info)));
if (rc != sizeof(info)) {
- DL_ERR("could not read prelink_info_t structure for \"%s\":", name, strerror(errno));
- return true;
+ WARN("Could not read prelink_info_t structure for `%s`\n", name);
+ return 0;
}
- if (memcmp(info.tag, "PRE ", 4) == 0) {
- DL_ERR("prelinked libraries no longer supported: %s", name);
- return true;
+ if (memcmp(info.tag, "PRE ", 4)) {
+ WARN("`%s` is not a prelinked library\n", name);
+ return 0;
}
- return false;
+
+ return (unsigned long)info.mmap_addr;
}
/* verify_elf_header
@@ -781,10 +783,21 @@ static soinfo* load_library(const char* name)
return NULL;
}
- // We no longer support pre-linked libraries.
- if (is_prelinked(fd.fd, name)) {
+ unsigned req_base = (unsigned) is_prelinked(fd.fd, name);
+ if (req_base == (unsigned)-1) {
+ DL_ERR("%5d can't read end of library: %s: %s", pid, name,
+ strerror(errno));
return NULL;
}
+ if (req_base != 0) {
+ TRACE("[ %5d - Prelinked library '%s' requesting base @ 0x%08x ]\n",
+ pid, name, req_base);
+ } else {
+ TRACE("[ %5d - Non-prelinked library '%s' found. ]\n", pid, name);
+ }
+
+ TRACE("[ %5d - '%s' (%s) wants base=0x%08x sz=0x%08x ]\n", pid, name,
+ (req_base ? "prelinked" : "not pre-linked"), req_base, ext_sz);
// Reserve address space for all loadable segments.
void* load_start = NULL;
@@ -792,6 +805,7 @@ static soinfo* load_library(const char* name)
Elf32_Addr load_bias = 0;
ret = phdr_table_reserve_memory(phdr_table,
phdr_count,
+ req_base,
&load_start,
&load_size,
&load_bias);
diff --git a/linker/linker_phdr.c b/linker/linker_phdr.c
index 250ca20..36f848b 100644
--- a/linker/linker_phdr.c
+++ b/linker/linker_phdr.c
@@ -218,6 +218,8 @@ Elf32_Addr phdr_table_get_load_size(const Elf32_Phdr* phdr_table,
* Input:
* phdr_table -> program header table
* phdr_count -> number of entries in the tables
+ * required_base -> for prelinked libraries, mandatory load address
+ * of the first loadable segment. 0 otherwise.
* Output:
* load_start -> first page of reserved address space range
* load_size -> size in bytes of reserved address space range
@@ -229,18 +231,22 @@ Elf32_Addr phdr_table_get_load_size(const Elf32_Phdr* phdr_table,
int
phdr_table_reserve_memory(const Elf32_Phdr* phdr_table,
size_t phdr_count,
+ Elf32_Addr required_base,
void** load_start,
Elf32_Addr* load_size,
Elf32_Addr* load_bias)
{
Elf32_Addr size = phdr_table_get_load_size(phdr_table, phdr_count);
+
if (size == 0) {
errno = EINVAL;
return -1;
}
int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS;
- void* start = mmap(NULL, size, PROT_NONE, mmap_flags, -1, 0);
+ if (required_base != 0)
+ mmap_flags |= MAP_FIXED;
+ void* start = mmap((void*)required_base, size, PROT_NONE, mmap_flags, -1, 0);
if (start == MAP_FAILED) {
return -1;
}
diff --git a/linker/linker_phdr.h b/linker/linker_phdr.h
index a759262..19e281b 100644
--- a/linker/linker_phdr.h
+++ b/linker/linker_phdr.h
@@ -61,6 +61,7 @@ phdr_table_get_load_size(const Elf32_Phdr* phdr_table,
int
phdr_table_reserve_memory(const Elf32_Phdr* phdr_table,
size_t phdr_count,
+ Elf32_Addr required_base,
void** load_start,
Elf32_Addr* load_size,
Elf32_Addr* load_bias);