summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--benchmarks/math_benchmark.cpp150
-rw-r--r--libc/Android.mk7
-rw-r--r--libc/arch-arm/arm.mk3
-rw-r--r--libc/arch-arm/cortex-a15/bionic/__strcat_chk.S192
-rw-r--r--libc/arch-arm/cortex-a15/bionic/__strcat_chk_common.S212
-rw-r--r--libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S156
-rw-r--r--libc/arch-arm/cortex-a15/bionic/__strcpy_chk_common.S173
-rw-r--r--libc/arch-arm/cortex-a15/bionic/memcpy.S79
-rw-r--r--libc/arch-arm/cortex-a15/bionic/memcpy_base.S42
-rw-r--r--libc/arch-arm/cortex-a15/bionic/memcpy_common.S103
-rw-r--r--libc/arch-arm/cortex-a15/bionic/strcat.S311
-rw-r--r--libc/arch-arm/cortex-a15/bionic/string_copy.S21
-rw-r--r--libc/arch-arm/cortex-a15/bionic/strlen.S60
-rw-r--r--libc/arch-arm/cortex-a15/cortex-a15.mk1
-rw-r--r--libc/arch-arm/cortex-a53.a57/cortex-a53.a57.mk22
-rw-r--r--libc/arch-arm/cortex-a53/bionic/__strcat_chk.S32
-rw-r--r--libc/arch-arm/cortex-a53/bionic/__strcpy_chk.S32
-rw-r--r--libc/arch-arm/cortex-a53/bionic/memcpy.S32
-rw-r--r--libc/arch-arm/cortex-a53/bionic/memcpy_base.S143
-rw-r--r--libc/arch-arm/cortex-a53/cortex-a53.mk22
-rw-r--r--libc/arch-arm/cortex-a7/bionic/memset.S180
-rw-r--r--libc/arch-arm/cortex-a7/cortex-a7.mk20
-rw-r--r--libc/arch-arm/cortex-a9/bionic/memcpy_base.S3
-rw-r--r--libc/arch-arm/cortex-a9/bionic/memset.S33
-rw-r--r--libc/arch-arm/cortex-a9/bionic/strcat.S261
-rw-r--r--libc/arch-arm/cortex-a9/bionic/string_copy.S21
-rw-r--r--libc/arch-arm/cortex-a9/cortex-a9.mk1
-rw-r--r--libc/arch-arm/denver/denver.mk1
-rw-r--r--libc/arch-arm/generic/bionic/memchr.S155
-rw-r--r--libc/arch-arm/generic/bionic/memcmp.S3
-rw-r--r--libc/arch-arm/generic/bionic/memcpy.S6
-rw-r--r--libc/arch-arm/generic/bionic/memset.S6
-rw-r--r--libc/arch-arm/generic/generic.mk1
-rw-r--r--libc/arch-arm/krait/bionic/__strcat_chk.S19
-rw-r--r--libc/arch-arm/krait/bionic/__strcpy_chk.S15
-rw-r--r--libc/arch-arm/krait/bionic/memcpy.S17
-rw-r--r--libc/arch-arm/krait/bionic/memcpy_base.S314
-rw-r--r--libc/arch-arm/krait/bionic/memmove.S219
-rw-r--r--libc/arch-arm/krait/bionic/memset.S20
-rw-r--r--libc/arch-arm/krait/krait.mk16
-rw-r--r--libc/arch-arm/scorpion/scorpion.mk18
-rw-r--r--libc/arch-arm64/arm64.mk1
-rw-r--r--libc/arch-arm64/denver64/bionic/memmove.S329
-rw-r--r--libc/arch-arm64/denver64/denver64.mk3
-rw-r--r--libc/arch-arm64/generic/bionic/memcpy_base.S312
-rw-r--r--libc/arch-arm64/generic/bionic/memmove.S404
-rw-r--r--libc/arch-arm64/generic/bionic/memset.S395
-rw-r--r--libc/arch-arm64/generic/bionic/strlen.S243
-rw-r--r--libc/arch-arm64/generic/bionic/strrchr.S171
-rw-r--r--libc/arch-arm64/generic/generic.mk1
-rw-r--r--libc/arch-arm64/kryo/bionic/memcpy.S65
-rw-r--r--libc/arch-arm64/kryo/bionic/memcpy_base.S244
-rw-r--r--libc/arch-arm64/kryo/bionic/memmove.S329
-rw-r--r--libc/arch-arm64/kryo/kryo.mk15
-rw-r--r--libc/arch-x86_64/string/sse2-memmove-slm.S3
-rw-r--r--libc/bionic/legacy_32_bit_support.cpp19
-rw-r--r--libc/bionic/libc_init_common.cpp1
-rw-r--r--libc/bionic/malloc_debug_check.cpp147
-rw-r--r--libc/bionic/malloc_debug_common.cpp6
-rw-r--r--libc/bionic/mmap.cpp7
-rw-r--r--libc/dns/net/getaddrinfo.c8
-rw-r--r--libc/include/libgen.h17
-rw-r--r--libc/include/paths.h1
-rw-r--r--libc/include/regex.h5
-rw-r--r--libc/include/string.h4
-rw-r--r--libc/include/sys/resource.h3
-rw-r--r--libc/kernel/uapi/linux/android_alarm.h13
-rw-r--r--libc/kernel/uapi/linux/if_packet.h58
-rw-r--r--libc/kernel/uapi/linux/time.h3
-rw-r--r--libc/upstream-netbsd/lib/libc/gen/popen.c2
-rw-r--r--libm/Android.mk14
-rw-r--r--libm/arm/e_sqrtf.S39
-rw-r--r--libm/arm/floor.S (renamed from libm/arm/s_floor.S)0
-rw-r--r--libm/arm/sqrt.S (renamed from libm/arm/e_sqrt.S)7
-rw-r--r--libm/arm64/fenv.c19
-rw-r--r--libm/fabs.cpp46
-rw-r--r--libm/fake_long_double.c1
-rw-r--r--libm/include/math.h151
-rw-r--r--libm/upstream-freebsd/lib/msun/ld128/k_expl.h4
-rw-r--r--libm/upstream-freebsd/lib/msun/src/catrig.c66
-rw-r--r--libm/upstream-freebsd/lib/msun/src/catrigf.c66
-rw-r--r--libm/upstream-freebsd/lib/msun/src/e_j0.c32
-rw-r--r--libm/upstream-freebsd/lib/msun/src/e_j0f.c47
-rw-r--r--libm/upstream-freebsd/lib/msun/src/e_j1.c30
-rw-r--r--libm/upstream-freebsd/lib/msun/src/e_j1f.c45
-rw-r--r--libm/upstream-freebsd/lib/msun/src/e_jn.c12
-rw-r--r--libm/upstream-freebsd/lib/msun/src/e_jnf.c13
-rw-r--r--libm/upstream-freebsd/lib/msun/src/k_exp.c4
-rw-r--r--libm/upstream-freebsd/lib/msun/src/k_expf.c4
-rw-r--r--libm/upstream-freebsd/lib/msun/src/math_private.h20
-rw-r--r--libm/upstream-freebsd/lib/msun/src/s_ccosh.c61
-rw-r--r--libm/upstream-freebsd/lib/msun/src/s_ccoshf.c45
-rw-r--r--libm/upstream-freebsd/lib/msun/src/s_cexp.c14
-rw-r--r--libm/upstream-freebsd/lib/msun/src/s_cexpf.c14
-rw-r--r--libm/upstream-freebsd/lib/msun/src/s_conj.c4
-rw-r--r--libm/upstream-freebsd/lib/msun/src/s_conjf.c4
-rw-r--r--libm/upstream-freebsd/lib/msun/src/s_conjl.c4
-rw-r--r--libm/upstream-freebsd/lib/msun/src/s_cproj.c4
-rw-r--r--libm/upstream-freebsd/lib/msun/src/s_cprojf.c4
-rw-r--r--libm/upstream-freebsd/lib/msun/src/s_cprojl.c4
-rw-r--r--libm/upstream-freebsd/lib/msun/src/s_csinh.c75
-rw-r--r--libm/upstream-freebsd/lib/msun/src/s_csinhf.c47
-rw-r--r--libm/upstream-freebsd/lib/msun/src/s_csqrt.c16
-rw-r--r--libm/upstream-freebsd/lib/msun/src/s_csqrtf.c16
-rw-r--r--libm/upstream-freebsd/lib/msun/src/s_csqrtl.c16
-rw-r--r--libm/upstream-freebsd/lib/msun/src/s_ctanh.c45
-rw-r--r--libm/upstream-freebsd/lib/msun/src/s_ctanhf.c19
-rw-r--r--libm/upstream-freebsd/lib/msun/src/s_exp2.c6
-rw-r--r--libm/upstream-freebsd/lib/msun/src/s_fabs.c31
-rw-r--r--libm/upstream-freebsd/lib/msun/src/s_fabsf.c33
-rw-r--r--libm/upstream-freebsd/lib/msun/src/s_scalbln.c42
-rw-r--r--linker/linker.cpp79
-rw-r--r--tests/Android.mk1
-rw-r--r--tests/buffer_tests.cpp16
-rw-r--r--tests/libgen_basename_test.cpp89
-rw-r--r--tests/libgen_test.cpp24
-rw-r--r--tests/regex_test.cpp10
-rw-r--r--tests/string_test.cpp20
-rw-r--r--tests/sys_resource_test.cpp25
119 files changed, 4964 insertions, 2355 deletions
diff --git a/benchmarks/math_benchmark.cpp b/benchmarks/math_benchmark.cpp
index 4de28d1..ed5b56c 100644
--- a/benchmarks/math_benchmark.cpp
+++ b/benchmarks/math_benchmark.cpp
@@ -65,6 +65,50 @@ void BM_math_logb::Run(int iters) {
StopBenchmarkTiming();
}
+BENCHMARK_WITH_ARG(BM_math_isfinite_macro, double)->AT_COMMON_VALS;
+void BM_math_isfinite_macro::Run(int iters, double value) {
+ StartBenchmarkTiming();
+
+ d = 0.0;
+ v = value;
+ for (int i = 0; i < iters; ++i) {
+ d += isfinite(v);
+ }
+
+ StopBenchmarkTiming();
+}
+
+#if defined(__BIONIC__)
+#define test_isfinite __isfinite
+#else
+#define test_isfinite __finite
+#endif
+BENCHMARK_WITH_ARG(BM_math_isfinite, double)->AT_COMMON_VALS;
+void BM_math_isfinite::Run(int iters, double value) {
+ StartBenchmarkTiming();
+
+ d = 0.0;
+ v = value;
+ for (int i = 0; i < iters; ++i) {
+ d += test_isfinite(v);
+ }
+
+ StopBenchmarkTiming();
+}
+
+BENCHMARK_WITH_ARG(BM_math_isinf_macro, double)->AT_COMMON_VALS;
+void BM_math_isinf_macro::Run(int iters, double value) {
+ StartBenchmarkTiming();
+
+ d = 0.0;
+ v = value;
+ for (int i = 0; i < iters; ++i) {
+ d += isinf(v);
+ }
+
+ StopBenchmarkTiming();
+}
+
BENCHMARK_WITH_ARG(BM_math_isinf, double)->AT_COMMON_VALS;
void BM_math_isinf::Run(int iters, double value) {
StartBenchmarkTiming();
@@ -78,6 +122,60 @@ void BM_math_isinf::Run(int iters, double value) {
StopBenchmarkTiming();
}
+BENCHMARK_WITH_ARG(BM_math_isnan_macro, double)->AT_COMMON_VALS;
+void BM_math_isnan_macro::Run(int iters, double value) {
+ StartBenchmarkTiming();
+
+ d = 0.0;
+ v = value;
+ for (int i = 0; i < iters; ++i) {
+ d += isnan(v);
+ }
+
+ StopBenchmarkTiming();
+}
+
+BENCHMARK_WITH_ARG(BM_math_isnan, double)->AT_COMMON_VALS;
+void BM_math_isnan::Run(int iters, double value) {
+ StartBenchmarkTiming();
+
+ d = 0.0;
+ v = value;
+ for (int i = 0; i < iters; ++i) {
+ d += (isnan)(v);
+ }
+
+ StopBenchmarkTiming();
+}
+
+BENCHMARK_WITH_ARG(BM_math_isnormal_macro, double)->AT_COMMON_VALS;
+void BM_math_isnormal_macro::Run(int iters, double value) {
+ StartBenchmarkTiming();
+
+ d = 0.0;
+ v = value;
+ for (int i = 0; i < iters; ++i) {
+ d += isnormal(v);
+ }
+
+ StopBenchmarkTiming();
+}
+
+#if defined(__BIONIC__)
+BENCHMARK_WITH_ARG(BM_math_isnormal, double)->AT_COMMON_VALS;
+void BM_math_isnormal::Run(int iters, double value) {
+ StartBenchmarkTiming();
+
+ d = 0.0;
+ v = value;
+ for (int i = 0; i < iters; ++i) {
+ d += (__isnormal)(v);
+ }
+
+ StopBenchmarkTiming();
+}
+#endif
+
BENCHMARK_NO_ARG(BM_math_sin_fast);
void BM_math_sin_fast::Run(int iters) {
StartBenchmarkTiming();
@@ -134,3 +232,55 @@ void BM_math_fpclassify::Run(int iters, double value) {
StopBenchmarkTiming();
}
+
+BENCHMARK_WITH_ARG(BM_math_signbit_macro, double)->AT_COMMON_VALS;
+void BM_math_signbit_macro::Run(int iters, double value) {
+ StartBenchmarkTiming();
+
+ d = 0.0;
+ v = value;
+ for (int i = 0; i < iters; ++i) {
+ d += signbit(v);
+ }
+
+ StopBenchmarkTiming();
+}
+
+BENCHMARK_WITH_ARG(BM_math_signbit, double)->AT_COMMON_VALS;
+void BM_math_signbit::Run(int iters, double value) {
+ StartBenchmarkTiming();
+
+ d = 0.0;
+ v = value;
+ for (int i = 0; i < iters; ++i) {
+ d += (__signbit)(v);
+ }
+
+ StopBenchmarkTiming();
+}
+
+BENCHMARK_WITH_ARG(BM_math_fabs_macro, double)->AT_COMMON_VALS;
+void BM_math_fabs_macro::Run(int iters, double value) {
+ StartBenchmarkTiming();
+
+ d = 0.0;
+ v = value;
+ for (int i = 0; i < iters; ++i) {
+ d += fabs(v);
+ }
+
+ StopBenchmarkTiming();
+}
+
+BENCHMARK_WITH_ARG(BM_math_fabs, double)->AT_COMMON_VALS;
+void BM_math_fabs::Run(int iters, double value) {
+ StartBenchmarkTiming();
+
+ d = 0.0;
+ v = value;
+ for (int i = 0; i < iters; ++i) {
+ d += (fabs)(v);
+ }
+
+ StopBenchmarkTiming();
+}
diff --git a/libc/Android.mk b/libc/Android.mk
index f0c5e9f..f7f2adc 100644
--- a/libc/Android.mk
+++ b/libc/Android.mk
@@ -611,6 +611,10 @@ ifneq ($(BOARD_MALLOC_ALIGNMENT),)
libc_common_cflags += -DMALLOC_ALIGNMENT=$(BOARD_MALLOC_ALIGNMENT)
endif
+ifeq ($(BOARD_USES_LEGACY_MMAP),true)
+ libc_common_cflags += -DLEGACY_MMAP
+endif
+
# Define some common conlyflags
libc_common_conlyflags := \
-std=gnu99
@@ -1394,6 +1398,9 @@ LOCAL_SRC_FILES_arm += \
LOCAL_ADDRESS_SANITIZER := false
LOCAL_NATIVE_COVERAGE := $(bionic_coverage)
+# Allow devices to provide additional symbols
+LOCAL_WHOLE_STATIC_LIBRARIES += $(BOARD_PROVIDES_ADDITIONAL_BIONIC_STATIC_LIBS)
+
include $(BUILD_SHARED_LIBRARY)
diff --git a/libc/arch-arm/arm.mk b/libc/arch-arm/arm.mk
index d72a160..c2b80c5 100644
--- a/libc/arch-arm/arm.mk
+++ b/libc/arch-arm/arm.mk
@@ -20,7 +20,6 @@ libc_freebsd_src_files_arm += \
upstream-freebsd/lib/libc/string/wmemmove.c \
libc_openbsd_src_files_arm += \
- upstream-openbsd/lib/libc/string/memchr.c \
upstream-openbsd/lib/libc/string/memrchr.c \
upstream-openbsd/lib/libc/string/stpncpy.c \
upstream-openbsd/lib/libc/string/strlcat.c \
@@ -52,7 +51,7 @@ ifeq ($(strip $(TARGET_$(my_2nd_arch_prefix)CPU_VARIANT)),)
endif
cpu_variant_mk := $(LOCAL_PATH)/arch-arm/$(TARGET_$(my_2nd_arch_prefix)CPU_VARIANT)/$(TARGET_$(my_2nd_arch_prefix)CPU_VARIANT).mk
ifeq ($(wildcard $(cpu_variant_mk)),)
-$(error "TARGET_$(my_2nd_arch_prefix)CPU_VARIANT not set or set to an unknown value. Possible values are cortex-a7, cortex-a8, cortex-a9, cortex-a15, krait, denver. Use generic for devices that do not have a CPU similar to any of the supported cpu variants.")
+$(error "TARGET_$(my_2nd_arch_prefix)CPU_VARIANT not set or set to an unknown value. Possible values are cortex-a7, cortex-a8, cortex-a9, cortex-a15, krait, scorpion, denver. Use generic for devices that do not have a CPU similar to any of the supported cpu variants.")
endif
include $(cpu_variant_mk)
libc_common_additional_dependencies += $(cpu_variant_mk)
diff --git a/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S b/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S
index a2e9c22..3692f04 100644
--- a/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S
+++ b/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2013 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -26,191 +26,7 @@
* SUCH DAMAGE.
*/
-#include <private/bionic_asm.h>
-#include <private/libc_events.h>
+// Indicate which memcpy base file to include.
+#define MEMCPY_BASE "memcpy_base.S"
- .syntax unified
-
- .thumb
- .thumb_func
-
-// Get the length of src string, then get the source of the dst string.
-// Check that the two lengths together don't exceed the threshold, then
-// do a memcpy of the data.
-ENTRY(__strcat_chk)
- pld [r0, #0]
- push {r0, lr}
- .cfi_def_cfa_offset 8
- .cfi_rel_offset r0, 0
- .cfi_rel_offset lr, 4
- push {r4, r5}
- .cfi_adjust_cfa_offset 8
- .cfi_rel_offset r4, 0
- .cfi_rel_offset r5, 4
-
- mov lr, r2
-
- // Save the dst register to r5
- mov r5, r0
-
- // Zero out r4
- eor r4, r4, r4
-
- // r1 contains the address of the string to count.
-.L_strlen_start:
- mov r0, r1
- ands r3, r1, #7
- beq .L_mainloop
-
- // Align to a double word (64 bits).
- rsb r3, r3, #8
- lsls ip, r3, #31
- beq .L_align_to_32
-
- ldrb r2, [r1], #1
- cbz r2, .L_update_count_and_finish
-
-.L_align_to_32:
- bcc .L_align_to_64
- ands ip, r3, #2
- beq .L_align_to_64
-
- ldrb r2, [r1], #1
- cbz r2, .L_update_count_and_finish
- ldrb r2, [r1], #1
- cbz r2, .L_update_count_and_finish
-
-.L_align_to_64:
- tst r3, #4
- beq .L_mainloop
- ldr r3, [r1], #4
-
- sub ip, r3, #0x01010101
- bic ip, ip, r3
- ands ip, ip, #0x80808080
- bne .L_zero_in_second_register
-
- .p2align 2
-.L_mainloop:
- ldrd r2, r3, [r1], #8
-
- pld [r1, #64]
-
- sub ip, r2, #0x01010101
- bic ip, ip, r2
- ands ip, ip, #0x80808080
- bne .L_zero_in_first_register
-
- sub ip, r3, #0x01010101
- bic ip, ip, r3
- ands ip, ip, #0x80808080
- bne .L_zero_in_second_register
- b .L_mainloop
-
-.L_update_count_and_finish:
- sub r3, r1, r0
- sub r3, r3, #1
- b .L_finish
-
-.L_zero_in_first_register:
- sub r3, r1, r0
- lsls r2, ip, #17
- bne .L_sub8_and_finish
- bcs .L_sub7_and_finish
- lsls ip, ip, #1
- bne .L_sub6_and_finish
-
- sub r3, r3, #5
- b .L_finish
-
-.L_sub8_and_finish:
- sub r3, r3, #8
- b .L_finish
-
-.L_sub7_and_finish:
- sub r3, r3, #7
- b .L_finish
-
-.L_sub6_and_finish:
- sub r3, r3, #6
- b .L_finish
-
-.L_zero_in_second_register:
- sub r3, r1, r0
- lsls r2, ip, #17
- bne .L_sub4_and_finish
- bcs .L_sub3_and_finish
- lsls ip, ip, #1
- bne .L_sub2_and_finish
-
- sub r3, r3, #1
- b .L_finish
-
-.L_sub4_and_finish:
- sub r3, r3, #4
- b .L_finish
-
-.L_sub3_and_finish:
- sub r3, r3, #3
- b .L_finish
-
-.L_sub2_and_finish:
- sub r3, r3, #2
-
-.L_finish:
- cmp r4, #0
- bne .L_strlen_done
-
- // Time to get the dst string length.
- mov r1, r5
-
- // Save the original source address to r5.
- mov r5, r0
-
- // Save the current length (adding 1 for the terminator).
- add r4, r3, #1
- b .L_strlen_start
-
- // r0 holds the pointer to the dst string.
- // r3 holds the dst string length.
- // r4 holds the src string length + 1.
-.L_strlen_done:
- add r2, r3, r4
- cmp r2, lr
- bhi __strcat_chk_failed
-
- // Set up the registers for the memcpy code.
- mov r1, r5
- pld [r1, #64]
- mov r2, r4
- add r0, r0, r3
- pop {r4, r5}
-END(__strcat_chk)
-
-#define MEMCPY_BASE __strcat_chk_memcpy_base
-#define MEMCPY_BASE_ALIGNED __strcat_chk_memcpy_base_aligned
-
-#include "memcpy_base.S"
-
-ENTRY_PRIVATE(__strcat_chk_failed)
- .cfi_def_cfa_offset 8
- .cfi_rel_offset r0, 0
- .cfi_rel_offset lr, 4
- .cfi_adjust_cfa_offset 8
- .cfi_rel_offset r4, 0
- .cfi_rel_offset r5, 4
-
- ldr r0, error_message
- ldr r1, error_code
-1:
- add r0, pc
- bl __fortify_chk_fail
-error_code:
- .word BIONIC_EVENT_STRCAT_BUFFER_OVERFLOW
-error_message:
- .word error_string-(1b+4)
-END(__strcat_chk_failed)
-
- .data
-error_string:
- .string "strcat: prevented write past end of buffer"
+#include "__strcat_chk_common.S"
diff --git a/libc/arch-arm/cortex-a15/bionic/__strcat_chk_common.S b/libc/arch-arm/cortex-a15/bionic/__strcat_chk_common.S
new file mode 100644
index 0000000..de66967
--- /dev/null
+++ b/libc/arch-arm/cortex-a15/bionic/__strcat_chk_common.S
@@ -0,0 +1,212 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <private/bionic_asm.h>
+#include <private/libc_events.h>
+
+ .syntax unified
+
+ .thumb
+ .thumb_func
+
+// Get the length of src string, then get the source of the dst string.
+// Check that the two lengths together don't exceed the threshold, then
+// do a memcpy of the data.
+ENTRY(__strcat_chk)
+ pld [r0, #0]
+ push {r0, lr}
+ .cfi_def_cfa_offset 8
+ .cfi_rel_offset r0, 0
+ .cfi_rel_offset lr, 4
+ push {r4, r5}
+ .cfi_adjust_cfa_offset 8
+ .cfi_rel_offset r4, 0
+ .cfi_rel_offset r5, 4
+
+ mov lr, r2
+
+ // Save the dst register to r5
+ mov r5, r0
+
+ // Zero out r4
+ eor r4, r4, r4
+
+ // r1 contains the address of the string to count.
+.L_strlen_start:
+ mov r0, r1
+ ands r3, r1, #7
+ beq .L_mainloop
+
+ // Align to a double word (64 bits).
+ rsb r3, r3, #8
+ lsls ip, r3, #31
+ beq .L_align_to_32
+
+ ldrb r2, [r1], #1
+ cbz r2, .L_update_count_and_finish
+
+.L_align_to_32:
+ bcc .L_align_to_64
+ ands ip, r3, #2
+ beq .L_align_to_64
+
+ ldrb r2, [r1], #1
+ cbz r2, .L_update_count_and_finish
+ ldrb r2, [r1], #1
+ cbz r2, .L_update_count_and_finish
+
+.L_align_to_64:
+ tst r3, #4
+ beq .L_mainloop
+ ldr r3, [r1], #4
+
+ sub ip, r3, #0x01010101
+ bic ip, ip, r3
+ ands ip, ip, #0x80808080
+ bne .L_zero_in_second_register
+
+ .p2align 2
+.L_mainloop:
+ ldrd r2, r3, [r1], #8
+
+ pld [r1, #64]
+
+ sub ip, r2, #0x01010101
+ bic ip, ip, r2
+ ands ip, ip, #0x80808080
+ bne .L_zero_in_first_register
+
+ sub ip, r3, #0x01010101
+ bic ip, ip, r3
+ ands ip, ip, #0x80808080
+ bne .L_zero_in_second_register
+ b .L_mainloop
+
+.L_update_count_and_finish:
+ sub r3, r1, r0
+ sub r3, r3, #1
+ b .L_finish
+
+.L_zero_in_first_register:
+ sub r3, r1, r0
+ lsls r2, ip, #17
+ bne .L_sub8_and_finish
+ bcs .L_sub7_and_finish
+ lsls ip, ip, #1
+ bne .L_sub6_and_finish
+
+ sub r3, r3, #5
+ b .L_finish
+
+.L_sub8_and_finish:
+ sub r3, r3, #8
+ b .L_finish
+
+.L_sub7_and_finish:
+ sub r3, r3, #7
+ b .L_finish
+
+.L_sub6_and_finish:
+ sub r3, r3, #6
+ b .L_finish
+
+.L_zero_in_second_register:
+ sub r3, r1, r0
+ lsls r2, ip, #17
+ bne .L_sub4_and_finish
+ bcs .L_sub3_and_finish
+ lsls ip, ip, #1
+ bne .L_sub2_and_finish
+
+ sub r3, r3, #1
+ b .L_finish
+
+.L_sub4_and_finish:
+ sub r3, r3, #4
+ b .L_finish
+
+.L_sub3_and_finish:
+ sub r3, r3, #3
+ b .L_finish
+
+.L_sub2_and_finish:
+ sub r3, r3, #2
+
+.L_finish:
+ cmp r4, #0
+ bne .L_strlen_done
+
+ // Time to get the dst string length.
+ mov r1, r5
+
+ // Save the original source address to r5.
+ mov r5, r0
+
+ // Save the current length (adding 1 for the terminator).
+ add r4, r3, #1
+ b .L_strlen_start
+
+ // r0 holds the pointer to the dst string.
+ // r3 holds the dst string length.
+ // r4 holds the src string length + 1.
+.L_strlen_done:
+ add r2, r3, r4
+ cmp r2, lr
+ bhi .L_strcat_chk_failed
+
+ // Set up the registers for the memcpy code.
+ mov r1, r5
+ pld [r1, #64]
+ mov r2, r4
+ add r0, r0, r3
+ pop {r4, r5}
+ .cfi_adjust_cfa_offset -8
+ .cfi_restore r4
+ .cfi_restore r5
+
+#include MEMCPY_BASE
+
+ // Undo the above cfi directives
+ .cfi_adjust_cfa_offset 8
+ .cfi_rel_offset r4, 0
+ .cfi_rel_offset r5, 4
+.L_strcat_chk_failed:
+ ldr r0, error_message
+ ldr r1, error_code
+1:
+ add r0, pc
+ bl __fortify_chk_fail
+error_code:
+ .word BIONIC_EVENT_STRCAT_BUFFER_OVERFLOW
+error_message:
+ .word error_string-(1b+4)
+END(__strcat_chk)
+
+ .data
+error_string:
+ .string "strcat: prevented write past end of buffer"
diff --git a/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S b/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S
index db76686..d8cb3d9 100644
--- a/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S
+++ b/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2013 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -26,155 +26,7 @@
* SUCH DAMAGE.
*/
-#include <private/bionic_asm.h>
-#include <private/libc_events.h>
+// Indicate which memcpy base file to include.
+#define MEMCPY_BASE "memcpy_base.S"
- .syntax unified
-
- .thumb
- .thumb_func
-
-// Get the length of the source string first, then do a memcpy of the data
-// instead of a strcpy.
-ENTRY(__strcpy_chk)
- pld [r0, #0]
- push {r0, lr}
- .cfi_def_cfa_offset 8
- .cfi_rel_offset r0, 0
- .cfi_rel_offset lr, 4
-
- mov lr, r2
- mov r0, r1
-
- ands r3, r1, #7
- beq .L_mainloop
-
- // Align to a double word (64 bits).
- rsb r3, r3, #8
- lsls ip, r3, #31
- beq .L_align_to_32
-
- ldrb r2, [r0], #1
- cbz r2, .L_update_count_and_finish
-
-.L_align_to_32:
- bcc .L_align_to_64
- ands ip, r3, #2
- beq .L_align_to_64
-
- ldrb r2, [r0], #1
- cbz r2, .L_update_count_and_finish
- ldrb r2, [r0], #1
- cbz r2, .L_update_count_and_finish
-
-.L_align_to_64:
- tst r3, #4
- beq .L_mainloop
- ldr r3, [r0], #4
-
- sub ip, r3, #0x01010101
- bic ip, ip, r3
- ands ip, ip, #0x80808080
- bne .L_zero_in_second_register
-
- .p2align 2
-.L_mainloop:
- ldrd r2, r3, [r0], #8
-
- pld [r0, #64]
-
- sub ip, r2, #0x01010101
- bic ip, ip, r2
- ands ip, ip, #0x80808080
- bne .L_zero_in_first_register
-
- sub ip, r3, #0x01010101
- bic ip, ip, r3
- ands ip, ip, #0x80808080
- bne .L_zero_in_second_register
- b .L_mainloop
-
-.L_update_count_and_finish:
- sub r3, r0, r1
- sub r3, r3, #1
- b .L_check_size
-
-.L_zero_in_first_register:
- sub r3, r0, r1
- lsls r2, ip, #17
- bne .L_sub8_and_finish
- bcs .L_sub7_and_finish
- lsls ip, ip, #1
- bne .L_sub6_and_finish
-
- sub r3, r3, #5
- b .L_check_size
-
-.L_sub8_and_finish:
- sub r3, r3, #8
- b .L_check_size
-
-.L_sub7_and_finish:
- sub r3, r3, #7
- b .L_check_size
-
-.L_sub6_and_finish:
- sub r3, r3, #6
- b .L_check_size
-
-.L_zero_in_second_register:
- sub r3, r0, r1
- lsls r2, ip, #17
- bne .L_sub4_and_finish
- bcs .L_sub3_and_finish
- lsls ip, ip, #1
- bne .L_sub2_and_finish
-
- sub r3, r3, #1
- b .L_check_size
-
-.L_sub4_and_finish:
- sub r3, r3, #4
- b .L_check_size
-
-.L_sub3_and_finish:
- sub r3, r3, #3
- b .L_check_size
-
-.L_sub2_and_finish:
- sub r3, r3, #2
-
-.L_check_size:
- pld [r1, #0]
- pld [r1, #64]
- ldr r0, [sp]
- cmp r3, lr
- bhs __strcpy_chk_failed
-
- // Add 1 for copy length to get the string terminator.
- add r2, r3, #1
-END(__strcpy_chk)
-
-#define MEMCPY_BASE __strcpy_chk_memcpy_base
-#define MEMCPY_BASE_ALIGNED __strcpy_chk_memcpy_base_aligned
-#include "memcpy_base.S"
-
-ENTRY_PRIVATE(__strcpy_chk_failed)
- .cfi_def_cfa_offset 8
- .cfi_rel_offset r0, 0
- .cfi_rel_offset lr, 4
-
- ldr r0, error_message
- ldr r1, error_code
-1:
- add r0, pc
- bl __fortify_chk_fail
-error_code:
- .word BIONIC_EVENT_STRCPY_BUFFER_OVERFLOW
-error_message:
- .word error_string-(1b+4)
-END(__strcpy_chk_failed)
-
- .data
-error_string:
- .string "strcpy: prevented write past end of buffer"
+#include "__strcpy_chk_common.S"
diff --git a/libc/arch-arm/cortex-a15/bionic/__strcpy_chk_common.S b/libc/arch-arm/cortex-a15/bionic/__strcpy_chk_common.S
new file mode 100644
index 0000000..69ebcb4
--- /dev/null
+++ b/libc/arch-arm/cortex-a15/bionic/__strcpy_chk_common.S
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <private/bionic_asm.h>
+#include <private/libc_events.h>
+
+ .syntax unified
+
+ .thumb
+ .thumb_func
+
+// Get the length of the source string first, then do a memcpy of the data
+// instead of a strcpy.
+ENTRY(__strcpy_chk)
+ pld [r0, #0]
+ push {r0, lr}
+ .cfi_def_cfa_offset 8
+ .cfi_rel_offset r0, 0
+ .cfi_rel_offset lr, 4
+
+ mov lr, r2
+ mov r0, r1
+
+ ands r3, r1, #7
+ beq .L_mainloop
+
+ // Align to a double word (64 bits).
+ rsb r3, r3, #8
+ lsls ip, r3, #31
+ beq .L_align_to_32
+
+ ldrb r2, [r0], #1
+ cbz r2, .L_update_count_and_finish
+
+.L_align_to_32:
+ bcc .L_align_to_64
+ ands ip, r3, #2
+ beq .L_align_to_64
+
+ ldrb r2, [r0], #1
+ cbz r2, .L_update_count_and_finish
+ ldrb r2, [r0], #1
+ cbz r2, .L_update_count_and_finish
+
+.L_align_to_64:
+ tst r3, #4
+ beq .L_mainloop
+ ldr r3, [r0], #4
+
+ sub ip, r3, #0x01010101
+ bic ip, ip, r3
+ ands ip, ip, #0x80808080
+ bne .L_zero_in_second_register
+
+ .p2align 2
+.L_mainloop:
+ ldrd r2, r3, [r0], #8
+
+ pld [r0, #64]
+
+ sub ip, r2, #0x01010101
+ bic ip, ip, r2
+ ands ip, ip, #0x80808080
+ bne .L_zero_in_first_register
+
+ sub ip, r3, #0x01010101
+ bic ip, ip, r3
+ ands ip, ip, #0x80808080
+ bne .L_zero_in_second_register
+ b .L_mainloop
+
+.L_update_count_and_finish:
+ sub r3, r0, r1
+ sub r3, r3, #1
+ b .L_check_size
+
+.L_zero_in_first_register:
+ sub r3, r0, r1
+ lsls r2, ip, #17
+ bne .L_sub8_and_finish
+ bcs .L_sub7_and_finish
+ lsls ip, ip, #1
+ bne .L_sub6_and_finish
+
+ sub r3, r3, #5
+ b .L_check_size
+
+.L_sub8_and_finish:
+ sub r3, r3, #8
+ b .L_check_size
+
+.L_sub7_and_finish:
+ sub r3, r3, #7
+ b .L_check_size
+
+.L_sub6_and_finish:
+ sub r3, r3, #6
+ b .L_check_size
+
+.L_zero_in_second_register:
+ sub r3, r0, r1
+ lsls r2, ip, #17
+ bne .L_sub4_and_finish
+ bcs .L_sub3_and_finish
+ lsls ip, ip, #1
+ bne .L_sub2_and_finish
+
+ sub r3, r3, #1
+ b .L_check_size
+
+.L_sub4_and_finish:
+ sub r3, r3, #4
+ b .L_check_size
+
+.L_sub3_and_finish:
+ sub r3, r3, #3
+ b .L_check_size
+
+.L_sub2_and_finish:
+ sub r3, r3, #2
+
+.L_check_size:
+ pld [r1, #0]
+ pld [r1, #64]
+ ldr r0, [sp]
+ cmp r3, lr
+ bhs .L_strcpy_chk_failed
+
+ // Add 1 for copy length to get the string terminator.
+ add r2, r3, #1
+
+#include MEMCPY_BASE
+
+.L_strcpy_chk_failed:
+ ldr r0, error_message
+ ldr r1, error_code
+1:
+ add r0, pc
+ bl __fortify_chk_fail
+error_code:
+ .word BIONIC_EVENT_STRCPY_BUFFER_OVERFLOW
+error_message:
+ .word error_string-(1b+4)
+END(__strcpy_chk)
+
+ .data
+error_string:
+ .string "strcpy: prevented write past end of buffer"
diff --git a/libc/arch-arm/cortex-a15/bionic/memcpy.S b/libc/arch-arm/cortex-a15/bionic/memcpy.S
index 410b663..537f3de 100644
--- a/libc/arch-arm/cortex-a15/bionic/memcpy.S
+++ b/libc/arch-arm/cortex-a15/bionic/memcpy.S
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2008 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -25,79 +25,8 @@
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
-/*
- * Copyright (c) 2013 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- * products derived from this software without specific prior written
- * permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// Prototype: void *memcpy (void *dst, const void *src, size_t count).
-
-#include <private/bionic_asm.h>
-#include <private/libc_events.h>
-
- .text
- .syntax unified
- .fpu neon
-
-ENTRY(__memcpy_chk)
- cmp r2, r3
- bhi __memcpy_chk_fail
-
- // Fall through to memcpy...
-END(__memcpy_chk)
-
-ENTRY(memcpy)
- pld [r1, #64]
- push {r0, lr}
- .cfi_def_cfa_offset 8
- .cfi_rel_offset r0, 0
- .cfi_rel_offset lr, 4
-END(memcpy)
-
-#define MEMCPY_BASE __memcpy_base
-#define MEMCPY_BASE_ALIGNED __memcpy_base_aligned
-#include "memcpy_base.S"
-
-ENTRY_PRIVATE(__memcpy_chk_fail)
- // Preserve lr for backtrace.
- push {lr}
- .cfi_def_cfa_offset 4
- .cfi_rel_offset lr, 0
- ldr r0, error_message
- ldr r1, error_code
-1:
- add r0, pc
- bl __fortify_chk_fail
-error_code:
- .word BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW
-error_message:
- .word error_string-(1b+8)
-END(__memcpy_chk_fail)
+// Indicate which memcpy base file to include.
+#define MEMCPY_BASE "memcpy_base.S"
- .data
-error_string:
- .string "memcpy: prevented write past end of buffer"
+#include "memcpy_common.S"
diff --git a/libc/arch-arm/cortex-a15/bionic/memcpy_base.S b/libc/arch-arm/cortex-a15/bionic/memcpy_base.S
index 2a73852..aac737d 100644
--- a/libc/arch-arm/cortex-a15/bionic/memcpy_base.S
+++ b/libc/arch-arm/cortex-a15/bionic/memcpy_base.S
@@ -53,11 +53,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-ENTRY_PRIVATE(MEMCPY_BASE)
- .cfi_def_cfa_offset 8
- .cfi_rel_offset r0, 0
- .cfi_rel_offset lr, 4
-
+.L_memcpy_base:
// Assumes that n >= 0, and dst, src are valid pointers.
// For any sizes less than 832 use the neon code that doesn't
// care about the src alignment. This avoids any checks
@@ -168,12 +164,6 @@ ENTRY_PRIVATE(MEMCPY_BASE)
eor r3, r0, r1
ands r3, r3, #0x3
bne .L_copy_unknown_alignment
-END(MEMCPY_BASE)
-
-ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED)
- .cfi_def_cfa_offset 8
- .cfi_rel_offset r0, 0
- .cfi_rel_offset lr, 4
// To try and improve performance, stack layout changed,
// i.e., not keeping the stack looking like users expect
@@ -185,7 +175,7 @@ ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED)
strd r6, r7, [sp, #-8]!
.cfi_adjust_cfa_offset 8
.cfi_rel_offset r6, 0
- .cfi_rel_offset r7, 0
+ .cfi_rel_offset r7, 4
strd r8, r9, [sp, #-8]!
.cfi_adjust_cfa_offset 8
.cfi_rel_offset r8, 0
@@ -291,10 +281,28 @@ ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED)
// Restore registers: optimized pop {r0, pc}
ldrd r8, r9, [sp], #8
+ .cfi_adjust_cfa_offset -8
+ .cfi_restore r8
+ .cfi_restore r9
ldrd r6, r7, [sp], #8
+ .cfi_adjust_cfa_offset -8
+ .cfi_restore r6
+ .cfi_restore r7
ldrd r4, r5, [sp], #8
+ .cfi_adjust_cfa_offset -8
+ .cfi_restore r4
+ .cfi_restore r5
pop {r0, pc}
+ // Put the cfi directives back for the below instructions.
+ .cfi_adjust_cfa_offset 24
+ .cfi_rel_offset r4, 0
+ .cfi_rel_offset r5, 4
+ .cfi_rel_offset r6, 8
+ .cfi_rel_offset r7, 12
+ .cfi_rel_offset r8, 16
+ .cfi_rel_offset r9, 20
+
.L_dst_not_word_aligned:
// Align dst to word.
rsb ip, ip, #4
@@ -315,4 +323,12 @@ ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED)
// Src is guaranteed to be at least word aligned by this point.
b .L_word_aligned
-END(MEMCPY_BASE_ALIGNED)
+
+ // Undo any cfi directives from above.
+ .cfi_adjust_cfa_offset -24
+ .cfi_restore r4
+ .cfi_restore r5
+ .cfi_restore r6
+ .cfi_restore r7
+ .cfi_restore r8
+ .cfi_restore r9
diff --git a/libc/arch-arm/cortex-a15/bionic/memcpy_common.S b/libc/arch-arm/cortex-a15/bionic/memcpy_common.S
new file mode 100644
index 0000000..464fb46
--- /dev/null
+++ b/libc/arch-arm/cortex-a15/bionic/memcpy_common.S
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Copyright (c) 2013 ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <private/bionic_asm.h>
+#include <private/libc_events.h>
+
+ .text
+ .syntax unified
+ .fpu neon
+
+ENTRY(__memcpy_chk)
+ cmp r2, r3
+ bhi .L_memcpy_chk_fail
+
+ // Fall through to memcpy...
+END(__memcpy_chk)
+
+// Prototype: void *memcpy (void *dst, const void *src, size_t count).
+ENTRY(memcpy)
+ pld [r1, #64]
+ push {r0, lr}
+ .cfi_def_cfa_offset 8
+ .cfi_rel_offset r0, 0
+ .cfi_rel_offset lr, 4
+
+#include MEMCPY_BASE
+
+ // Undo the cfi instructions from above.
+ .cfi_def_cfa_offset 0
+ .cfi_restore r0
+ .cfi_restore lr
+.L_memcpy_chk_fail:
+ // Preserve lr for backtrace.
+ push {lr}
+ .cfi_adjust_cfa_offset 4
+ .cfi_rel_offset lr, 0
+
+ ldr r0, error_message
+ ldr r1, error_code
+1:
+ add r0, pc
+ bl __fortify_chk_fail
+error_code:
+ .word BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW
+error_message:
+ .word error_string-(1b+8)
+END(memcpy)
+
+ .data
+error_string:
+ .string "memcpy: prevented write past end of buffer"
diff --git a/libc/arch-arm/cortex-a15/bionic/strcat.S b/libc/arch-arm/cortex-a15/bionic/strcat.S
index b95be94..157cc9f 100644
--- a/libc/arch-arm/cortex-a15/bionic/strcat.S
+++ b/libc/arch-arm/cortex-a15/bionic/strcat.S
@@ -70,7 +70,7 @@
.macro m_scan_byte
ldrb r3, [r0]
- cbz r3, strcat_r0_scan_done
+ cbz r3, .L_strcat_r0_scan_done
add r0, #1
.endm // m_scan_byte
@@ -84,10 +84,10 @@ ENTRY(strcat)
// Quick check to see if src is empty.
ldrb r2, [r1]
pld [r1, #0]
- cbnz r2, strcat_continue
+ cbnz r2, .L_strcat_continue
bx lr
-strcat_continue:
+.L_strcat_continue:
// To speed up really small dst strings, unroll checking the first 4 bytes.
m_push
m_scan_byte
@@ -96,95 +96,102 @@ strcat_continue:
m_scan_byte
ands r3, r0, #7
- beq strcat_mainloop
+ beq .L_strcat_mainloop
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
- beq strcat_align_to_32
+ beq .L_strcat_align_to_32
ldrb r5, [r0]
- cbz r5, strcat_r0_scan_done
+ cbz r5, .L_strcat_r0_scan_done
add r0, r0, #1
-strcat_align_to_32:
- bcc strcat_align_to_64
+.L_strcat_align_to_32:
+ bcc .L_strcat_align_to_64
ldrb r2, [r0]
- cbz r2, strcat_r0_scan_done
+ cbz r2, .L_strcat_r0_scan_done
add r0, r0, #1
ldrb r4, [r0]
- cbz r4, strcat_r0_scan_done
+ cbz r4, .L_strcat_r0_scan_done
add r0, r0, #1
-strcat_align_to_64:
+.L_strcat_align_to_64:
tst r3, #4
- beq strcat_mainloop
+ beq .L_strcat_mainloop
ldr r3, [r0], #4
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcat_zero_in_second_register
- b strcat_mainloop
+ bne .L_strcat_zero_in_second_register
+ b .L_strcat_mainloop
-strcat_r0_scan_done:
+.L_strcat_r0_scan_done:
// For short copies, hard-code checking the first 8 bytes since this
// new code doesn't win until after about 8 bytes.
- m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue
-
-strcpy_finish:
+ m_copy_byte reg=r2, cmd=cbz, label=.L_strcpy_finish
+ m_copy_byte reg=r3, cmd=cbz, label=.L_strcpy_finish
+ m_copy_byte reg=r4, cmd=cbz, label=.L_strcpy_finish
+ m_copy_byte reg=r5, cmd=cbz, label=.L_strcpy_finish
+ m_copy_byte reg=r2, cmd=cbz, label=.L_strcpy_finish
+ m_copy_byte reg=r3, cmd=cbz, label=.L_strcpy_finish
+ m_copy_byte reg=r4, cmd=cbz, label=.L_strcpy_finish
+ m_copy_byte reg=r5, cmd=cbnz, label=.L_strcpy_continue
+
+.L_strcpy_finish:
m_pop
-strcpy_continue:
+.L_strcpy_continue:
ands r3, r0, #7
- beq strcpy_check_src_align
+ beq .L_strcpy_check_src_align
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
- beq strcpy_align_to_32
+ beq .L_strcpy_align_to_32
ldrb r2, [r1], #1
strb r2, [r0], #1
- cbz r2, strcpy_complete
+ cbz r2, .L_strcpy_complete
-strcpy_align_to_32:
- bcc strcpy_align_to_64
+.L_strcpy_align_to_32:
+ bcc .L_strcpy_align_to_64
ldrb r2, [r1], #1
strb r2, [r0], #1
- cbz r2, strcpy_complete
+ cbz r2, .L_strcpy_complete
ldrb r2, [r1], #1
strb r2, [r0], #1
- cbz r2, strcpy_complete
+ cbz r2, .L_strcpy_complete
-strcpy_align_to_64:
+.L_strcpy_align_to_64:
tst r3, #4
- beq strcpy_check_src_align
- ldr r2, [r1], #4
-
- sub ip, r2, #0x01010101
- bic ip, ip, r2
- ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
- str r2, [r0], #4
+ beq .L_strcpy_check_src_align
+ // Read one byte at a time since we don't know the src alignment
+ // and we don't want to read into a different page.
+ ldrb r2, [r1], #1
+ strb r2, [r0], #1
+ cbz r2, .L_strcpy_complete
+ ldrb r2, [r1], #1
+ strb r2, [r0], #1
+ cbz r2, .L_strcpy_complete
+ ldrb r2, [r1], #1
+ strb r2, [r0], #1
+ cbz r2, .L_strcpy_complete
+ ldrb r2, [r1], #1
+ strb r2, [r0], #1
+ cbz r2, .L_strcpy_complete
-strcpy_check_src_align:
+.L_strcpy_check_src_align:
// At this point dst is aligned to a double word, check if src
// is also aligned to a double word.
ands r3, r1, #7
- bne strcpy_unaligned_copy
+ bne .L_strcpy_unaligned_copy
.p2align 2
-strcpy_mainloop:
+.L_strcpy_mainloop:
ldrd r2, r3, [r1], #8
pld [r1, #64]
@@ -192,128 +199,128 @@ strcpy_mainloop:
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
+ bne .L_strcpy_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcpy_zero_in_second_register
+ bne .L_strcpy_zero_in_second_register
strd r2, r3, [r0], #8
- b strcpy_mainloop
+ b .L_strcpy_mainloop
-strcpy_complete:
+.L_strcpy_complete:
m_pop
-strcpy_zero_in_first_register:
+.L_strcpy_zero_in_first_register:
lsls lr, ip, #17
- bne strcpy_copy1byte
- bcs strcpy_copy2bytes
+ bne .L_strcpy_copy1byte
+ bcs .L_strcpy_copy2bytes
lsls ip, ip, #1
- bne strcpy_copy3bytes
+ bne .L_strcpy_copy3bytes
-strcpy_copy4bytes:
+.L_strcpy_copy4bytes:
// Copy 4 bytes to the destiniation.
str r2, [r0]
m_pop
-strcpy_copy1byte:
+.L_strcpy_copy1byte:
strb r2, [r0]
m_pop
-strcpy_copy2bytes:
+.L_strcpy_copy2bytes:
strh r2, [r0]
m_pop
-strcpy_copy3bytes:
+.L_strcpy_copy3bytes:
strh r2, [r0], #2
lsr r2, #16
strb r2, [r0]
m_pop
-strcpy_zero_in_second_register:
+.L_strcpy_zero_in_second_register:
lsls lr, ip, #17
- bne strcpy_copy5bytes
- bcs strcpy_copy6bytes
+ bne .L_strcpy_copy5bytes
+ bcs .L_strcpy_copy6bytes
lsls ip, ip, #1
- bne strcpy_copy7bytes
+ bne .L_strcpy_copy7bytes
// Copy 8 bytes to the destination.
strd r2, r3, [r0]
m_pop
-strcpy_copy5bytes:
+.L_strcpy_copy5bytes:
str r2, [r0], #4
strb r3, [r0]
m_pop
-strcpy_copy6bytes:
+.L_strcpy_copy6bytes:
str r2, [r0], #4
strh r3, [r0]
m_pop
-strcpy_copy7bytes:
+.L_strcpy_copy7bytes:
str r2, [r0], #4
strh r3, [r0], #2
lsr r3, #16
strb r3, [r0]
m_pop
-strcpy_unaligned_copy:
+.L_strcpy_unaligned_copy:
// Dst is aligned to a double word, while src is at an unknown alignment.
// There are 7 different versions of the unaligned copy code
// to prevent overreading the src. The mainloop of every single version
// will store 64 bits per loop. The difference is how much of src can
// be read without potentially crossing a page boundary.
tbb [pc, r3]
-strcpy_unaligned_branchtable:
+.L_strcpy_unaligned_branchtable:
.byte 0
- .byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2)
- .byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2)
- .byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2)
- .byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2)
- .byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2)
- .byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2)
- .byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2)
+ .byte ((.L_strcpy_unalign7 - .L_strcpy_unaligned_branchtable)/2)
+ .byte ((.L_strcpy_unalign6 - .L_strcpy_unaligned_branchtable)/2)
+ .byte ((.L_strcpy_unalign5 - .L_strcpy_unaligned_branchtable)/2)
+ .byte ((.L_strcpy_unalign4 - .L_strcpy_unaligned_branchtable)/2)
+ .byte ((.L_strcpy_unalign3 - .L_strcpy_unaligned_branchtable)/2)
+ .byte ((.L_strcpy_unalign2 - .L_strcpy_unaligned_branchtable)/2)
+ .byte ((.L_strcpy_unalign1 - .L_strcpy_unaligned_branchtable)/2)
.p2align 2
// Can read 7 bytes before possibly crossing a page.
-strcpy_unalign7:
+.L_strcpy_unalign7:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
+ bne .L_strcpy_zero_in_first_register
ldrb r3, [r1]
- cbz r3, strcpy_unalign7_copy5bytes
+ cbz r3, .L_strcpy_unalign7_copy5bytes
ldrb r4, [r1, #1]
- cbz r4, strcpy_unalign7_copy6bytes
+ cbz r4, .L_strcpy_unalign7_copy6bytes
ldrb r5, [r1, #2]
- cbz r5, strcpy_unalign7_copy7bytes
+ cbz r5, .L_strcpy_unalign7_copy7bytes
ldr r3, [r1], #4
pld [r1, #64]
lsrs ip, r3, #24
strd r2, r3, [r0], #8
- beq strcpy_unalign_return
- b strcpy_unalign7
+ beq .L_strcpy_unalign_return
+ b .L_strcpy_unalign7
-strcpy_unalign7_copy5bytes:
+.L_strcpy_unalign7_copy5bytes:
str r2, [r0], #4
strb r3, [r0]
-strcpy_unalign_return:
+.L_strcpy_unalign_return:
m_pop
-strcpy_unalign7_copy6bytes:
+.L_strcpy_unalign7_copy6bytes:
str r2, [r0], #4
strb r3, [r0], #1
strb r4, [r0], #1
m_pop
-strcpy_unalign7_copy7bytes:
+.L_strcpy_unalign7_copy7bytes:
str r2, [r0], #4
strb r3, [r0], #1
strb r4, [r0], #1
@@ -322,41 +329,41 @@ strcpy_unalign7_copy7bytes:
.p2align 2
// Can read 6 bytes before possibly crossing a page.
-strcpy_unalign6:
+.L_strcpy_unalign6:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
+ bne .L_strcpy_zero_in_first_register
ldrb r4, [r1]
- cbz r4, strcpy_unalign_copy5bytes
+ cbz r4, .L_strcpy_unalign_copy5bytes
ldrb r5, [r1, #1]
- cbz r5, strcpy_unalign_copy6bytes
+ cbz r5, .L_strcpy_unalign_copy6bytes
ldr r3, [r1], #4
pld [r1, #64]
tst r3, #0xff0000
- beq strcpy_copy7bytes
+ beq .L_strcpy_copy7bytes
lsrs ip, r3, #24
strd r2, r3, [r0], #8
- beq strcpy_unalign_return
- b strcpy_unalign6
+ beq .L_strcpy_unalign_return
+ b .L_strcpy_unalign6
.p2align 2
// Can read 5 bytes before possibly crossing a page.
-strcpy_unalign5:
+.L_strcpy_unalign5:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
+ bne .L_strcpy_zero_in_first_register
ldrb r4, [r1]
- cbz r4, strcpy_unalign_copy5bytes
+ cbz r4, .L_strcpy_unalign_copy5bytes
ldr r3, [r1], #4
@@ -365,17 +372,17 @@ strcpy_unalign5:
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcpy_zero_in_second_register
+ bne .L_strcpy_zero_in_second_register
strd r2, r3, [r0], #8
- b strcpy_unalign5
+ b .L_strcpy_unalign5
-strcpy_unalign_copy5bytes:
+.L_strcpy_unalign_copy5bytes:
str r2, [r0], #4
strb r4, [r0]
m_pop
-strcpy_unalign_copy6bytes:
+.L_strcpy_unalign_copy6bytes:
str r2, [r0], #4
strb r4, [r0], #1
strb r5, [r0]
@@ -383,13 +390,13 @@ strcpy_unalign_copy6bytes:
.p2align 2
// Can read 4 bytes before possibly crossing a page.
-strcpy_unalign4:
+.L_strcpy_unalign4:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
+ bne .L_strcpy_zero_in_first_register
ldr r3, [r1], #4
pld [r1, #64]
@@ -397,20 +404,20 @@ strcpy_unalign4:
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcpy_zero_in_second_register
+ bne .L_strcpy_zero_in_second_register
strd r2, r3, [r0], #8
- b strcpy_unalign4
+ b .L_strcpy_unalign4
.p2align 2
// Can read 3 bytes before possibly crossing a page.
-strcpy_unalign3:
+.L_strcpy_unalign3:
ldrb r2, [r1]
- cbz r2, strcpy_unalign3_copy1byte
+ cbz r2, .L_strcpy_unalign3_copy1byte
ldrb r3, [r1, #1]
- cbz r3, strcpy_unalign3_copy2bytes
+ cbz r3, .L_strcpy_unalign3_copy2bytes
ldrb r4, [r1, #2]
- cbz r4, strcpy_unalign3_copy3bytes
+ cbz r4, .L_strcpy_unalign3_copy3bytes
ldr r2, [r1], #4
ldr r3, [r1], #4
@@ -418,26 +425,26 @@ strcpy_unalign3:
pld [r1, #64]
lsrs lr, r2, #24
- beq strcpy_copy4bytes
+ beq .L_strcpy_copy4bytes
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcpy_zero_in_second_register
+ bne .L_strcpy_zero_in_second_register
strd r2, r3, [r0], #8
- b strcpy_unalign3
+ b .L_strcpy_unalign3
-strcpy_unalign3_copy1byte:
+.L_strcpy_unalign3_copy1byte:
strb r2, [r0]
m_pop
-strcpy_unalign3_copy2bytes:
+.L_strcpy_unalign3_copy2bytes:
strb r2, [r0], #1
strb r3, [r0]
m_pop
-strcpy_unalign3_copy3bytes:
+.L_strcpy_unalign3_copy3bytes:
strb r2, [r0], #1
strb r3, [r0], #1
strb r4, [r0]
@@ -445,34 +452,34 @@ strcpy_unalign3_copy3bytes:
.p2align 2
// Can read 2 bytes before possibly crossing a page.
-strcpy_unalign2:
+.L_strcpy_unalign2:
ldrb r2, [r1]
- cbz r2, strcpy_unalign_copy1byte
+ cbz r2, .L_strcpy_unalign_copy1byte
ldrb r4, [r1, #1]
- cbz r4, strcpy_unalign_copy2bytes
+ cbz r4, .L_strcpy_unalign_copy2bytes
ldr r2, [r1], #4
ldr r3, [r1], #4
pld [r1, #64]
tst r2, #0xff0000
- beq strcpy_copy3bytes
+ beq .L_strcpy_copy3bytes
lsrs ip, r2, #24
- beq strcpy_copy4bytes
+ beq .L_strcpy_copy4bytes
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcpy_zero_in_second_register
+ bne .L_strcpy_zero_in_second_register
strd r2, r3, [r0], #8
- b strcpy_unalign2
+ b .L_strcpy_unalign2
.p2align 2
// Can read 1 byte before possibly crossing a page.
-strcpy_unalign1:
+.L_strcpy_unalign1:
ldrb r2, [r1]
- cbz r2, strcpy_unalign_copy1byte
+ cbz r2, .L_strcpy_unalign_copy1byte
ldr r2, [r1], #4
ldr r3, [r1], #4
@@ -482,27 +489,27 @@ strcpy_unalign1:
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
+ bne .L_strcpy_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcpy_zero_in_second_register
+ bne .L_strcpy_zero_in_second_register
strd r2, r3, [r0], #8
- b strcpy_unalign1
+ b .L_strcpy_unalign1
-strcpy_unalign_copy1byte:
+.L_strcpy_unalign_copy1byte:
strb r2, [r0]
m_pop
-strcpy_unalign_copy2bytes:
+.L_strcpy_unalign_copy2bytes:
strb r2, [r0], #1
strb r4, [r0]
m_pop
.p2align 2
-strcat_mainloop:
+.L_strcat_mainloop:
ldrd r2, r3, [r0], #8
pld [r0, #64]
@@ -510,59 +517,59 @@ strcat_mainloop:
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcat_zero_in_first_register
+ bne .L_strcat_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcat_zero_in_second_register
- b strcat_mainloop
+ bne .L_strcat_zero_in_second_register
+ b .L_strcat_mainloop
-strcat_zero_in_first_register:
+.L_strcat_zero_in_first_register:
// Prefetch the src now, it's going to be used soon.
pld [r1, #0]
lsls lr, ip, #17
- bne strcat_sub8
- bcs strcat_sub7
+ bne .L_strcat_sub8
+ bcs .L_strcat_sub7
lsls ip, ip, #1
- bne strcat_sub6
+ bne .L_strcat_sub6
sub r0, r0, #5
- b strcat_r0_scan_done
+ b .L_strcat_r0_scan_done
-strcat_sub8:
+.L_strcat_sub8:
sub r0, r0, #8
- b strcat_r0_scan_done
+ b .L_strcat_r0_scan_done
-strcat_sub7:
+.L_strcat_sub7:
sub r0, r0, #7
- b strcat_r0_scan_done
+ b .L_strcat_r0_scan_done
-strcat_sub6:
+.L_strcat_sub6:
sub r0, r0, #6
- b strcat_r0_scan_done
+ b .L_strcat_r0_scan_done
-strcat_zero_in_second_register:
+.L_strcat_zero_in_second_register:
// Prefetch the src now, it's going to be used soon.
pld [r1, #0]
lsls lr, ip, #17
- bne strcat_sub4
- bcs strcat_sub3
+ bne .L_strcat_sub4
+ bcs .L_strcat_sub3
lsls ip, ip, #1
- bne strcat_sub2
+ bne .L_strcat_sub2
sub r0, r0, #1
- b strcat_r0_scan_done
+ b .L_strcat_r0_scan_done
-strcat_sub4:
+.L_strcat_sub4:
sub r0, r0, #4
- b strcat_r0_scan_done
+ b .L_strcat_r0_scan_done
-strcat_sub3:
+.L_strcat_sub3:
sub r0, r0, #3
- b strcat_r0_scan_done
+ b .L_strcat_r0_scan_done
-strcat_sub2:
+.L_strcat_sub2:
sub r0, r0, #2
- b strcat_r0_scan_done
+ b .L_strcat_r0_scan_done
END(strcat)
diff --git a/libc/arch-arm/cortex-a15/bionic/string_copy.S b/libc/arch-arm/cortex-a15/bionic/string_copy.S
index 20f0e91..92d1c98 100644
--- a/libc/arch-arm/cortex-a15/bionic/string_copy.S
+++ b/libc/arch-arm/cortex-a15/bionic/string_copy.S
@@ -149,13 +149,20 @@ ENTRY(strcpy)
.Lstringcopy_align_to_64:
tst r3, #4
beq .Lstringcopy_check_src_align
- ldr r2, [r1], #4
-
- sub ip, r2, #0x01010101
- bic ip, ip, r2
- ands ip, ip, #0x80808080
- bne .Lstringcopy_zero_in_first_register
- str r2, [r0], #4
+ // Read one byte at a time since we don't have any idea about the alignment
+ // of the source and we don't want to read into a different page.
+ ldrb r2, [r1], #1
+ strb r2, [r0], #1
+ cbz r2, .Lstringcopy_complete
+ ldrb r2, [r1], #1
+ strb r2, [r0], #1
+ cbz r2, .Lstringcopy_complete
+ ldrb r2, [r1], #1
+ strb r2, [r0], #1
+ cbz r2, .Lstringcopy_complete
+ ldrb r2, [r1], #1
+ strb r2, [r0], #1
+ cbz r2, .Lstringcopy_complete
.Lstringcopy_check_src_align:
// At this point dst is aligned to a double word, check if src
diff --git a/libc/arch-arm/cortex-a15/bionic/strlen.S b/libc/arch-arm/cortex-a15/bionic/strlen.S
index 9a0ce62..4fd6284 100644
--- a/libc/arch-arm/cortex-a15/bionic/strlen.S
+++ b/libc/arch-arm/cortex-a15/bionic/strlen.S
@@ -65,38 +65,38 @@ ENTRY(strlen)
mov r1, r0
ands r3, r0, #7
- beq mainloop
+ beq .L_mainloop
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
- beq align_to_32
+ beq .L_align_to_32
ldrb r2, [r1], #1
- cbz r2, update_count_and_return
+ cbz r2, .L_update_count_and_return
-align_to_32:
- bcc align_to_64
+.L_align_to_32:
+ bcc .L_align_to_64
ands ip, r3, #2
- beq align_to_64
+ beq .L_align_to_64
ldrb r2, [r1], #1
- cbz r2, update_count_and_return
+ cbz r2, .L_update_count_and_return
ldrb r2, [r1], #1
- cbz r2, update_count_and_return
+ cbz r2, .L_update_count_and_return
-align_to_64:
+.L_align_to_64:
tst r3, #4
- beq mainloop
+ beq .L_mainloop
ldr r3, [r1], #4
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne zero_in_second_register
+ bne .L_zero_in_second_register
.p2align 2
-mainloop:
+.L_mainloop:
ldrd r2, r3, [r1], #8
pld [r1, #64]
@@ -104,62 +104,62 @@ mainloop:
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne zero_in_first_register
+ bne .L_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne zero_in_second_register
- b mainloop
+ bne .L_zero_in_second_register
+ b .L_mainloop
-update_count_and_return:
+.L_update_count_and_return:
sub r0, r1, r0
sub r0, r0, #1
bx lr
-zero_in_first_register:
+.L_zero_in_first_register:
sub r0, r1, r0
lsls r3, ip, #17
- bne sub8_and_return
- bcs sub7_and_return
+ bne .L_sub8_and_return
+ bcs .L_sub7_and_return
lsls ip, ip, #1
- bne sub6_and_return
+ bne .L_sub6_and_return
sub r0, r0, #5
bx lr
-sub8_and_return:
+.L_sub8_and_return:
sub r0, r0, #8
bx lr
-sub7_and_return:
+.L_sub7_and_return:
sub r0, r0, #7
bx lr
-sub6_and_return:
+.L_sub6_and_return:
sub r0, r0, #6
bx lr
-zero_in_second_register:
+.L_zero_in_second_register:
sub r0, r1, r0
lsls r3, ip, #17
- bne sub4_and_return
- bcs sub3_and_return
+ bne .L_sub4_and_return
+ bcs .L_sub3_and_return
lsls ip, ip, #1
- bne sub2_and_return
+ bne .L_sub2_and_return
sub r0, r0, #1
bx lr
-sub4_and_return:
+.L_sub4_and_return:
sub r0, r0, #4
bx lr
-sub3_and_return:
+.L_sub3_and_return:
sub r0, r0, #3
bx lr
-sub2_and_return:
+.L_sub2_and_return:
sub r0, r0, #2
bx lr
END(strlen)
diff --git a/libc/arch-arm/cortex-a15/cortex-a15.mk b/libc/arch-arm/cortex-a15/cortex-a15.mk
index 6fa3270..202a3bf 100644
--- a/libc/arch-arm/cortex-a15/cortex-a15.mk
+++ b/libc/arch-arm/cortex-a15/cortex-a15.mk
@@ -10,6 +10,7 @@ libc_bionic_src_files_arm += \
arch-arm/cortex-a15/bionic/strlen.S \
libc_bionic_src_files_arm += \
+ arch-arm/generic/bionic/memchr.S \
arch-arm/generic/bionic/memcmp.S \
libc_bionic_src_files_arm += \
diff --git a/libc/arch-arm/cortex-a53.a57/cortex-a53.a57.mk b/libc/arch-arm/cortex-a53.a57/cortex-a53.a57.mk
new file mode 100644
index 0000000..5d7efc6
--- /dev/null
+++ b/libc/arch-arm/cortex-a53.a57/cortex-a53.a57.mk
@@ -0,0 +1,22 @@
+# This file represents the best optimized routines that are the middle
+# ground when running on a big/little system that is cortex-a57/cortex-a53.
+# The cortex-a7 optimized routines, and the cortex-a53 optimized routines
+# decrease performance on cortex-a57 processors by as much as 20%.
+
+libc_bionic_src_files_arm += \
+ arch-arm/cortex-a15/bionic/memcpy.S \
+ arch-arm/cortex-a15/bionic/memset.S \
+ arch-arm/cortex-a15/bionic/stpcpy.S \
+ arch-arm/cortex-a15/bionic/strcat.S \
+ arch-arm/cortex-a15/bionic/__strcat_chk.S \
+ arch-arm/cortex-a15/bionic/strcmp.S \
+ arch-arm/cortex-a15/bionic/strcpy.S \
+ arch-arm/cortex-a15/bionic/__strcpy_chk.S \
+ arch-arm/cortex-a15/bionic/strlen.S \
+
+libc_bionic_src_files_arm += \
+ arch-arm/generic/bionic/memcmp.S \
+ arch-arm/generic/bionic/memchr.S
+
+libc_bionic_src_files_arm += \
+ arch-arm/denver/bionic/memmove.S \
diff --git a/libc/arch-arm/cortex-a53/bionic/__strcat_chk.S b/libc/arch-arm/cortex-a53/bionic/__strcat_chk.S
new file mode 100644
index 0000000..c5bc98a
--- /dev/null
+++ b/libc/arch-arm/cortex-a53/bionic/__strcat_chk.S
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+// Indicate which memcpy base file to include.
+#define MEMCPY_BASE "arch-arm/cortex-a53/bionic/memcpy_base.S"
+
+#include "arch-arm/cortex-a15/bionic/__strcat_chk_common.S"
diff --git a/libc/arch-arm/cortex-a53/bionic/__strcpy_chk.S b/libc/arch-arm/cortex-a53/bionic/__strcpy_chk.S
new file mode 100644
index 0000000..1f8945d
--- /dev/null
+++ b/libc/arch-arm/cortex-a53/bionic/__strcpy_chk.S
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+// Indicate which memcpy base file to include.
+#define MEMCPY_BASE "arch-arm/cortex-a53/bionic/memcpy_base.S"
+
+#include "arch-arm/cortex-a15/bionic/__strcpy_chk_common.S"
diff --git a/libc/arch-arm/cortex-a53/bionic/memcpy.S b/libc/arch-arm/cortex-a53/bionic/memcpy.S
new file mode 100644
index 0000000..664f574
--- /dev/null
+++ b/libc/arch-arm/cortex-a53/bionic/memcpy.S
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+// Indicate which memcpy base file to include.
+#define MEMCPY_BASE "arch-arm/cortex-a53/bionic/memcpy_base.S"
+
+#include "arch-arm/cortex-a15/bionic/memcpy_common.S"
diff --git a/libc/arch-arm/cortex-a53/bionic/memcpy_base.S b/libc/arch-arm/cortex-a53/bionic/memcpy_base.S
new file mode 100644
index 0000000..2749fc8
--- /dev/null
+++ b/libc/arch-arm/cortex-a53/bionic/memcpy_base.S
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Copyright (c) 2013 ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+.L_memcpy_base:
+ // Assumes that n >= 0, and dst, src are valid pointers.
+ cmp r2, #16
+ blo .L_copy_less_than_16_unknown_align
+
+.L_copy_unknown_alignment:
+ // Unknown alignment of src and dst.
+ // Assumes that the first few bytes have already been prefetched.
+
+ // Align destination to 128 bits. The mainloop store instructions
+ // require this alignment or they will throw an exception.
+ rsb r3, r0, #0
+ ands r3, r3, #0xF
+ beq 2f
+
+ // Copy up to 15 bytes (count in r3).
+ sub r2, r2, r3
+ movs ip, r3, lsl #31
+
+ itt mi
+ ldrbmi lr, [r1], #1
+ strbmi lr, [r0], #1
+ itttt cs
+ ldrbcs ip, [r1], #1
+ ldrbcs lr, [r1], #1
+ strbcs ip, [r0], #1
+ strbcs lr, [r0], #1
+
+ movs ip, r3, lsl #29
+ bge 1f
+ // Copies 4 bytes, dst 32 bits aligned before, at least 64 bits after.
+ vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]!
+ vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]!
+1: bcc 2f
+ // Copies 8 bytes, dst 64 bits aligned before, at least 128 bits after.
+ vld1.8 {d0}, [r1]!
+ vst1.8 {d0}, [r0, :64]!
+
+2: // Make sure we have at least 64 bytes to copy.
+ subs r2, r2, #64
+ blo 2f
+
+1: // The main loop copies 64 bytes at a time.
+ vld1.8 {d0 - d3}, [r1]!
+ vld1.8 {d4 - d7}, [r1]!
+ subs r2, r2, #64
+ vstmia r0!, {d0 - d7}
+ pld [r1, #(64*10)]
+ bhs 1b
+
+2: // Fix-up the remaining count and make sure we have >= 32 bytes left.
+ adds r2, r2, #32
+ blo 3f
+
+ // 32 bytes. These cache lines were already preloaded.
+ vld1.8 {d0 - d3}, [r1]!
+ sub r2, r2, #32
+ vst1.8 {d0 - d3}, [r0, :128]!
+3: // Less than 32 left.
+ add r2, r2, #32
+ tst r2, #0x10
+ beq .L_copy_less_than_16_unknown_align
+ // Copies 16 bytes, destination 128 bits aligned.
+ vld1.8 {d0, d1}, [r1]!
+ vst1.8 {d0, d1}, [r0, :128]!
+
+.L_copy_less_than_16_unknown_align:
+ // Copy up to 15 bytes (count in r2).
+ movs ip, r2, lsl #29
+ bcc 1f
+ vld1.8 {d0}, [r1]!
+ vst1.8 {d0}, [r0]!
+1: bge 2f
+ vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]!
+ vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]!
+
+2: // Copy 0 to 4 bytes.
+ lsls r2, r2, #31
+ itt ne
+ ldrbne lr, [r1], #1
+ strbne lr, [r0], #1
+ itttt cs
+ ldrbcs ip, [r1], #1
+ ldrbcs lr, [r1]
+ strbcs ip, [r0], #1
+ strbcs lr, [r0]
+
+ pop {r0, pc}
diff --git a/libc/arch-arm/cortex-a53/cortex-a53.mk b/libc/arch-arm/cortex-a53/cortex-a53.mk
index b5c337c..14aaa71 100644
--- a/libc/arch-arm/cortex-a53/cortex-a53.mk
+++ b/libc/arch-arm/cortex-a53/cortex-a53.mk
@@ -1 +1,21 @@
-include bionic/libc/arch-arm/cortex-a7/cortex-a7.mk
+libc_bionic_src_files_arm += \
+ arch-arm/cortex-a53/bionic/memcpy.S \
+ arch-arm/cortex-a53/bionic/__strcat_chk.S \
+ arch-arm/cortex-a53/bionic/__strcpy_chk.S \
+
+libc_bionic_src_files_arm += \
+ arch-arm/cortex-a7/bionic/memset.S \
+
+libc_bionic_src_files_arm += \
+ arch-arm/cortex-a15/bionic/stpcpy.S \
+ arch-arm/cortex-a15/bionic/strcat.S \
+ arch-arm/cortex-a15/bionic/strcmp.S \
+ arch-arm/cortex-a15/bionic/strcpy.S \
+ arch-arm/cortex-a15/bionic/strlen.S \
+
+libc_bionic_src_files_arm += \
+ arch-arm/generic/bionic/memchr.S \
+ arch-arm/generic/bionic/memcmp.S \
+
+libc_bionic_src_files_arm += \
+ arch-arm/denver/bionic/memmove.S \
diff --git a/libc/arch-arm/cortex-a7/bionic/memset.S b/libc/arch-arm/cortex-a7/bionic/memset.S
new file mode 100644
index 0000000..6365b06
--- /dev/null
+++ b/libc/arch-arm/cortex-a7/bionic/memset.S
@@ -0,0 +1,180 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <machine/cpu-features.h>
+#include <private/bionic_asm.h>
+#include <private/libc_events.h>
+
+ /*
+ * Optimized memset() for ARM.
+ *
+ * memset() returns its first argument.
+ */
+
+ .fpu neon
+ .syntax unified
+
+ENTRY(__memset_chk)
+ cmp r2, r3
+ bls .L_done
+
+ // Preserve lr for backtrace.
+ push {lr}
+ .cfi_def_cfa_offset 4
+ .cfi_rel_offset lr, 0
+
+ ldr r0, error_message
+ ldr r1, error_code
+1:
+ add r0, pc
+ bl __fortify_chk_fail
+error_code:
+ .word BIONIC_EVENT_MEMSET_BUFFER_OVERFLOW
+error_message:
+ .word error_string-(1b+8)
+END(__memset_chk)
+
+ENTRY(bzero)
+ mov r2, r1
+ mov r1, #0
+.L_done:
+ // Fall through to memset...
+END(bzero)
+
+ENTRY(memset)
+ mov r3, r0
+ // At this point only d0, d1 are going to be used below.
+ vdup.8 q0, r1
+ cmp r2, #16
+ blo .L_set_less_than_16_unknown_align
+
+.L_check_alignment:
+ // Align destination to a double word to avoid the store crossing
+ // a cache line boundary.
+ ands ip, r3, #7
+ bne .L_do_double_word_align
+
+.L_double_word_aligned:
+ // Duplicate since the less than 64 can use d2, d3.
+ vmov q1, q0
+ subs r2, #64
+ blo .L_set_less_than_64
+
+ // Duplicate the copy value so that we can store 64 bytes at a time.
+ vmov q2, q0
+ vmov q3, q0
+
+1: // Main loop stores 64 bytes at a time.
+ subs r2, #64
+ vstmia r3!, {d0 - d7}
+ bge 1b
+
+.L_set_less_than_64:
+ // Restore r2 to the count of bytes left to set.
+ add r2, #64
+ lsls ip, r2, #27
+ bcc .L_set_less_than_32
+ // Set 32 bytes.
+ vstmia r3!, {d0 - d3}
+
+.L_set_less_than_32:
+ bpl .L_set_less_than_16
+ // Set 16 bytes.
+ vstmia r3!, {d0, d1}
+
+.L_set_less_than_16:
+ // Less than 16 bytes to set.
+ lsls ip, r2, #29
+ bcc .L_set_less_than_8
+
+ // Set 8 bytes.
+ vstmia r3!, {d0}
+
+.L_set_less_than_8:
+ bpl .L_set_less_than_4
+ // Set 4 bytes
+ vst1.32 {d0[0]}, [r3]!
+
+.L_set_less_than_4:
+ lsls ip, r2, #31
+ it ne
+ strbne r1, [r3], #1
+ itt cs
+ strbcs r1, [r3], #1
+ strbcs r1, [r3]
+ bx lr
+
+.L_do_double_word_align:
+ rsb ip, ip, #8
+ sub r2, r2, ip
+
+ // Do this comparison now, otherwise we'll need to save a
+ // register to the stack since we've used all available
+ // registers.
+ cmp ip, #4
+ blo 1f
+
+ // Need to do a four byte copy.
+ movs ip, ip, lsl #31
+ it mi
+ strbmi r1, [r3], #1
+ itt cs
+ strbcs r1, [r3], #1
+ strbcs r1, [r3], #1
+ vst1.32 {d0[0]}, [r3]!
+ b .L_double_word_aligned
+
+1:
+ // No four byte copy.
+ movs ip, ip, lsl #31
+ it mi
+ strbmi r1, [r3], #1
+ itt cs
+ strbcs r1, [r3], #1
+ strbcs r1, [r3], #1
+ b .L_double_word_aligned
+
+.L_set_less_than_16_unknown_align:
+ // Set up to 15 bytes.
+ movs ip, r2, lsl #29
+ bcc 1f
+ vst1.8 {d0}, [r3]!
+1: bge 2f
+ vst1.32 {d0[0]}, [r3]!
+2: movs ip, r2, lsl #31
+ it mi
+ strbmi r1, [r3], #1
+ itt cs
+ strbcs r1, [r3], #1
+ strbcs r1, [r3], #1
+ bx lr
+END(memset)
+
+ .data
+error_string:
+ .string "memset: prevented write past end of buffer"
diff --git a/libc/arch-arm/cortex-a7/cortex-a7.mk b/libc/arch-arm/cortex-a7/cortex-a7.mk
index 9af03d9..3629a57 100644
--- a/libc/arch-arm/cortex-a7/cortex-a7.mk
+++ b/libc/arch-arm/cortex-a7/cortex-a7.mk
@@ -1 +1,19 @@
-include bionic/libc/arch-arm/cortex-a15/cortex-a15.mk
+libc_bionic_src_files_arm += \
+ arch-arm/cortex-a7/bionic/memset.S \
+
+libc_bionic_src_files_arm += \
+ arch-arm/cortex-a15/bionic/memcpy.S \
+ arch-arm/cortex-a15/bionic/stpcpy.S \
+ arch-arm/cortex-a15/bionic/strcat.S \
+ arch-arm/cortex-a15/bionic/__strcat_chk.S \
+ arch-arm/cortex-a15/bionic/strcmp.S \
+ arch-arm/cortex-a15/bionic/strcpy.S \
+ arch-arm/cortex-a15/bionic/__strcpy_chk.S \
+ arch-arm/cortex-a15/bionic/strlen.S \
+
+libc_bionic_src_files_arm += \
+ arch-arm/generic/bionic/memchr.S \
+ arch-arm/generic/bionic/memcmp.S \
+
+libc_bionic_src_files_arm += \
+ arch-arm/denver/bionic/memmove.S \
diff --git a/libc/arch-arm/cortex-a9/bionic/memcpy_base.S b/libc/arch-arm/cortex-a9/bionic/memcpy_base.S
index 5e81305..6ab5a69 100644
--- a/libc/arch-arm/cortex-a9/bionic/memcpy_base.S
+++ b/libc/arch-arm/cortex-a9/bionic/memcpy_base.S
@@ -133,8 +133,7 @@ ENTRY_PRIVATE(MEMCPY_BASE)
strbcs ip, [r0], #1
strbcs lr, [r0], #1
- ldmfd sp!, {r0, lr}
- bx lr
+ ldmfd sp!, {r0, pc}
END(MEMCPY_BASE)
ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED)
diff --git a/libc/arch-arm/cortex-a9/bionic/memset.S b/libc/arch-arm/cortex-a9/bionic/memset.S
index 8ee6ac2..b39fcc4 100644
--- a/libc/arch-arm/cortex-a9/bionic/memset.S
+++ b/libc/arch-arm/cortex-a9/bionic/memset.S
@@ -69,12 +69,9 @@ END(bzero)
ENTRY(memset)
// The neon memset only wins for less than 132.
cmp r2, #132
- bhi __memset_large_copy
-
- stmfd sp!, {r0}
- .cfi_def_cfa_offset 4
- .cfi_rel_offset r0, 0
+ bhi .L_memset_large_copy
+ mov r3, r0
vdup.8 q0, r1
/* make sure we have at least 32 bytes to write */
@@ -84,7 +81,7 @@ ENTRY(memset)
1: /* The main loop writes 32 bytes at a time */
subs r2, r2, #32
- vst1.8 {d0 - d3}, [r0]!
+ vst1.8 {d0 - d3}, [r3]!
bhs 1b
2: /* less than 32 left */
@@ -93,22 +90,20 @@ ENTRY(memset)
beq 3f
// writes 16 bytes, 128-bits aligned
- vst1.8 {d0, d1}, [r0]!
+ vst1.8 {d0, d1}, [r3]!
3: /* write up to 15-bytes (count in r2) */
movs ip, r2, lsl #29
bcc 1f
- vst1.8 {d0}, [r0]!
+ vst1.8 {d0}, [r3]!
1: bge 2f
- vst1.32 {d0[0]}, [r0]!
+ vst1.32 {d0[0]}, [r3]!
2: movs ip, r2, lsl #31
- strbmi r1, [r0], #1
- strbcs r1, [r0], #1
- strbcs r1, [r0], #1
- ldmfd sp!, {r0}
+ strbmi r1, [r3], #1
+ strbcs r1, [r3], #1
+ strbcs r1, [r3], #1
bx lr
-END(memset)
-ENTRY_PRIVATE(__memset_large_copy)
+.L_memset_large_copy:
/* compute the offset to align the destination
* offset = (4-(src&3))&3 = -src & 3
*/
@@ -136,8 +131,7 @@ ENTRY_PRIVATE(__memset_large_copy)
strbcs r1, [r0], #1
strbmi r1, [r0], #1
subs r2, r2, r3
- popls {r0, r4-r7, lr} /* return */
- bxls lr
+ popls {r0, r4-r7, pc} /* return */
/* align the destination to a cache-line */
mov r12, r1
@@ -180,9 +174,8 @@ ENTRY_PRIVATE(__memset_large_copy)
strhmi r1, [r0], #2
movs r2, r2, lsl #2
strbcs r1, [r0]
- ldmfd sp!, {r0, r4-r7, lr}
- bx lr
-END(__memset_large_copy)
+ ldmfd sp!, {r0, r4-r7, pc}
+END(memset)
.data
error_string:
diff --git a/libc/arch-arm/cortex-a9/bionic/strcat.S b/libc/arch-arm/cortex-a9/bionic/strcat.S
index f5a855e..9077a74 100644
--- a/libc/arch-arm/cortex-a9/bionic/strcat.S
+++ b/libc/arch-arm/cortex-a9/bionic/strcat.S
@@ -70,7 +70,7 @@
.macro m_scan_byte
ldrb r3, [r0]
- cbz r3, strcat_r0_scan_done
+ cbz r3, .Lstrcat_r0_scan_done
add r0, #1
.endm // m_scan_byte
@@ -84,10 +84,10 @@ ENTRY(strcat)
// Quick check to see if src is empty.
ldrb r2, [r1]
pld [r1, #0]
- cbnz r2, strcat_continue
+ cbnz r2, .Lstrcat_continue
bx lr
-strcat_continue:
+.Lstrcat_continue:
// To speed up really small dst strings, unroll checking the first 4 bytes.
m_push
m_scan_byte
@@ -96,10 +96,10 @@ strcat_continue:
m_scan_byte
ands r3, r0, #7
- bne strcat_align_src
+ bne .Lstrcat_align_src
.p2align 2
-strcat_mainloop:
+.Lstrcat_mainloop:
ldmia r0!, {r2, r3}
pld [r0, #64]
@@ -107,28 +107,28 @@ strcat_mainloop:
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcat_zero_in_first_register
+ bne .Lstrcat_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcat_zero_in_second_register
- b strcat_mainloop
+ bne .Lstrcat_zero_in_second_register
+ b .Lstrcat_mainloop
-strcat_zero_in_first_register:
+.Lstrcat_zero_in_first_register:
sub r0, r0, #4
-strcat_zero_in_second_register:
+.Lstrcat_zero_in_second_register:
// Check for zero in byte 0.
tst ip, #0x80
it ne
subne r0, r0, #4
- bne strcat_r0_scan_done
+ bne .Lstrcat_r0_scan_done
// Check for zero in byte 1.
tst ip, #0x8000
it ne
subne r0, r0, #3
- bne strcat_r0_scan_done
+ bne .Lstrcat_r0_scan_done
// Check for zero in byte 2.
tst ip, #0x800000
it ne
@@ -137,33 +137,33 @@ strcat_zero_in_second_register:
// Zero is in byte 3.
subeq r0, r0, #1
-strcat_r0_scan_done:
+.Lstrcat_r0_scan_done:
// Unroll the first 8 bytes that will be copied.
- m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue
-
-strcpy_finish:
+ m_copy_byte reg=r2, cmd=cbz, label=.Lstrcpy_finish
+ m_copy_byte reg=r3, cmd=cbz, label=.Lstrcpy_finish
+ m_copy_byte reg=r4, cmd=cbz, label=.Lstrcpy_finish
+ m_copy_byte reg=r5, cmd=cbz, label=.Lstrcpy_finish
+ m_copy_byte reg=r2, cmd=cbz, label=.Lstrcpy_finish
+ m_copy_byte reg=r3, cmd=cbz, label=.Lstrcpy_finish
+ m_copy_byte reg=r4, cmd=cbz, label=.Lstrcpy_finish
+ m_copy_byte reg=r5, cmd=cbnz, label=.Lstrcpy_continue
+
+.Lstrcpy_finish:
m_ret inst=pop
-strcpy_continue:
+.Lstrcpy_continue:
pld [r1, #0]
ands r3, r0, #7
- bne strcpy_align_dst
+ bne .Lstrcpy_align_dst
-strcpy_check_src_align:
+.Lstrcpy_check_src_align:
// At this point dst is aligned to a double word, check if src
// is also aligned to a double word.
ands r3, r1, #7
- bne strcpy_unaligned_copy
+ bne .Lstrcpy_unaligned_copy
.p2align 2
-strcpy_mainloop:
+.Lstrcpy_mainloop:
ldmia r1!, {r2, r3}
pld [r1, #64]
@@ -171,17 +171,17 @@ strcpy_mainloop:
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
+ bne .Lstrcpy_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcpy_zero_in_second_register
+ bne .Lstrcpy_zero_in_second_register
stmia r0!, {r2, r3}
- b strcpy_mainloop
+ b .Lstrcpy_mainloop
-strcpy_zero_in_first_register:
+.Lstrcpy_zero_in_first_register:
lsls lr, ip, #17
itt ne
strbne r2, [r0]
@@ -198,7 +198,7 @@ strcpy_zero_in_first_register:
strb r3, [r0]
m_ret inst=pop
-strcpy_zero_in_second_register:
+.Lstrcpy_zero_in_second_register:
lsls lr, ip, #17
ittt ne
stmiane r0!, {r2}
@@ -218,18 +218,18 @@ strcpy_zero_in_second_register:
strb r4, [r0]
m_ret inst=pop
-strcpy_align_dst:
+.Lstrcpy_align_dst:
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
- beq strcpy_align_to_32
+ beq .Lstrcpy_align_to_32
ldrb r2, [r1], #1
strb r2, [r0], #1
- cbz r2, strcpy_complete
+ cbz r2, .Lstrcpy_complete
-strcpy_align_to_32:
- bcc strcpy_align_to_64
+.Lstrcpy_align_to_32:
+ bcc .Lstrcpy_align_to_64
ldrb r4, [r1], #1
strb r4, [r0], #1
@@ -242,76 +242,83 @@ strcpy_align_to_32:
it eq
m_ret inst=popeq
-strcpy_align_to_64:
+.Lstrcpy_align_to_64:
tst r3, #4
- beq strcpy_check_src_align
- ldr r2, [r1], #4
-
- sub ip, r2, #0x01010101
- bic ip, ip, r2
- ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
- stmia r0!, {r2}
- b strcpy_check_src_align
+ beq .Lstrcpy_check_src_align
+ // Read one byte at a time since we don't know the src alignment
+ // and we don't want to read into a different page.
+ ldrb r4, [r1], #1
+ strb r4, [r0], #1
+ cbz r4, .Lstrcpy_complete
+ ldrb r5, [r1], #1
+ strb r5, [r0], #1
+ cbz r5, .Lstrcpy_complete
+ ldrb r4, [r1], #1
+ strb r4, [r0], #1
+ cbz r4, .Lstrcpy_complete
+ ldrb r5, [r1], #1
+ strb r5, [r0], #1
+ cbz r5, .Lstrcpy_complete
+ b .Lstrcpy_check_src_align
-strcpy_complete:
+.Lstrcpy_complete:
m_ret inst=pop
-strcpy_unaligned_copy:
+.Lstrcpy_unaligned_copy:
// Dst is aligned to a double word, while src is at an unknown alignment.
// There are 7 different versions of the unaligned copy code
// to prevent overreading the src. The mainloop of every single version
// will store 64 bits per loop. The difference is how much of src can
// be read without potentially crossing a page boundary.
tbb [pc, r3]
-strcpy_unaligned_branchtable:
+.Lstrcpy_unaligned_branchtable:
.byte 0
- .byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2)
- .byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2)
- .byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2)
- .byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2)
- .byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2)
- .byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2)
- .byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2)
+ .byte ((.Lstrcpy_unalign7 - .Lstrcpy_unaligned_branchtable)/2)
+ .byte ((.Lstrcpy_unalign6 - .Lstrcpy_unaligned_branchtable)/2)
+ .byte ((.Lstrcpy_unalign5 - .Lstrcpy_unaligned_branchtable)/2)
+ .byte ((.Lstrcpy_unalign4 - .Lstrcpy_unaligned_branchtable)/2)
+ .byte ((.Lstrcpy_unalign3 - .Lstrcpy_unaligned_branchtable)/2)
+ .byte ((.Lstrcpy_unalign2 - .Lstrcpy_unaligned_branchtable)/2)
+ .byte ((.Lstrcpy_unalign1 - .Lstrcpy_unaligned_branchtable)/2)
.p2align 2
// Can read 7 bytes before possibly crossing a page.
-strcpy_unalign7:
+.Lstrcpy_unalign7:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
+ bne .Lstrcpy_zero_in_first_register
ldrb r3, [r1]
- cbz r3, strcpy_unalign7_copy5bytes
+ cbz r3, .Lstrcpy_unalign7_copy5bytes
ldrb r4, [r1, #1]
- cbz r4, strcpy_unalign7_copy6bytes
+ cbz r4, .Lstrcpy_unalign7_copy6bytes
ldrb r5, [r1, #2]
- cbz r5, strcpy_unalign7_copy7bytes
+ cbz r5, .Lstrcpy_unalign7_copy7bytes
ldr r3, [r1], #4
pld [r1, #64]
lsrs ip, r3, #24
stmia r0!, {r2, r3}
- beq strcpy_unalign_return
- b strcpy_unalign7
+ beq .Lstrcpy_unalign_return
+ b .Lstrcpy_unalign7
-strcpy_unalign7_copy5bytes:
+.Lstrcpy_unalign7_copy5bytes:
stmia r0!, {r2}
strb r3, [r0]
-strcpy_unalign_return:
+.Lstrcpy_unalign_return:
m_ret inst=pop
-strcpy_unalign7_copy6bytes:
+.Lstrcpy_unalign7_copy6bytes:
stmia r0!, {r2}
strb r3, [r0], #1
strb r4, [r0], #1
m_ret inst=pop
-strcpy_unalign7_copy7bytes:
+.Lstrcpy_unalign7_copy7bytes:
stmia r0!, {r2}
strb r3, [r0], #1
strb r4, [r0], #1
@@ -320,30 +327,30 @@ strcpy_unalign7_copy7bytes:
.p2align 2
// Can read 6 bytes before possibly crossing a page.
-strcpy_unalign6:
+.Lstrcpy_unalign6:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
+ bne .Lstrcpy_zero_in_first_register
ldrb r4, [r1]
- cbz r4, strcpy_unalign_copy5bytes
+ cbz r4, .Lstrcpy_unalign_copy5bytes
ldrb r5, [r1, #1]
- cbz r5, strcpy_unalign_copy6bytes
+ cbz r5, .Lstrcpy_unalign_copy6bytes
ldr r3, [r1], #4
pld [r1, #64]
tst r3, #0xff0000
- beq strcpy_unalign6_copy7bytes
+ beq .Lstrcpy_unalign6_copy7bytes
lsrs ip, r3, #24
stmia r0!, {r2, r3}
- beq strcpy_unalign_return
- b strcpy_unalign6
+ beq .Lstrcpy_unalign_return
+ b .Lstrcpy_unalign6
-strcpy_unalign6_copy7bytes:
+.Lstrcpy_unalign6_copy7bytes:
stmia r0!, {r2}
strh r3, [r0], #2
lsr r3, #16
@@ -352,16 +359,16 @@ strcpy_unalign6_copy7bytes:
.p2align 2
// Can read 5 bytes before possibly crossing a page.
-strcpy_unalign5:
+.Lstrcpy_unalign5:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
+ bne .Lstrcpy_zero_in_first_register
ldrb r4, [r1]
- cbz r4, strcpy_unalign_copy5bytes
+ cbz r4, .Lstrcpy_unalign_copy5bytes
ldr r3, [r1], #4
@@ -370,17 +377,17 @@ strcpy_unalign5:
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcpy_zero_in_second_register
+ bne .Lstrcpy_zero_in_second_register
stmia r0!, {r2, r3}
- b strcpy_unalign5
+ b .Lstrcpy_unalign5
-strcpy_unalign_copy5bytes:
+.Lstrcpy_unalign_copy5bytes:
stmia r0!, {r2}
strb r4, [r0]
m_ret inst=pop
-strcpy_unalign_copy6bytes:
+.Lstrcpy_unalign_copy6bytes:
stmia r0!, {r2}
strb r4, [r0], #1
strb r5, [r0]
@@ -388,13 +395,13 @@ strcpy_unalign_copy6bytes:
.p2align 2
// Can read 4 bytes before possibly crossing a page.
-strcpy_unalign4:
+.Lstrcpy_unalign4:
ldmia r1!, {r2}
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
+ bne .Lstrcpy_zero_in_first_register
ldmia r1!, {r3}
pld [r1, #64]
@@ -402,20 +409,20 @@ strcpy_unalign4:
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcpy_zero_in_second_register
+ bne .Lstrcpy_zero_in_second_register
stmia r0!, {r2, r3}
- b strcpy_unalign4
+ b .Lstrcpy_unalign4
.p2align 2
// Can read 3 bytes before possibly crossing a page.
-strcpy_unalign3:
+.Lstrcpy_unalign3:
ldrb r2, [r1]
- cbz r2, strcpy_unalign3_copy1byte
+ cbz r2, .Lstrcpy_unalign3_copy1byte
ldrb r3, [r1, #1]
- cbz r3, strcpy_unalign3_copy2bytes
+ cbz r3, .Lstrcpy_unalign3_copy2bytes
ldrb r4, [r1, #2]
- cbz r4, strcpy_unalign3_copy3bytes
+ cbz r4, .Lstrcpy_unalign3_copy3bytes
ldr r2, [r1], #4
ldr r3, [r1], #4
@@ -423,26 +430,26 @@ strcpy_unalign3:
pld [r1, #64]
lsrs lr, r2, #24
- beq strcpy_unalign_copy4bytes
+ beq .Lstrcpy_unalign_copy4bytes
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcpy_zero_in_second_register
+ bne .Lstrcpy_zero_in_second_register
stmia r0!, {r2, r3}
- b strcpy_unalign3
+ b .Lstrcpy_unalign3
-strcpy_unalign3_copy1byte:
+.Lstrcpy_unalign3_copy1byte:
strb r2, [r0]
m_ret inst=pop
-strcpy_unalign3_copy2bytes:
+.Lstrcpy_unalign3_copy2bytes:
strb r2, [r0], #1
strb r3, [r0]
m_ret inst=pop
-strcpy_unalign3_copy3bytes:
+.Lstrcpy_unalign3_copy3bytes:
strb r2, [r0], #1
strb r3, [r0], #1
strb r4, [r0]
@@ -450,34 +457,34 @@ strcpy_unalign3_copy3bytes:
.p2align 2
// Can read 2 bytes before possibly crossing a page.
-strcpy_unalign2:
+.Lstrcpy_unalign2:
ldrb r2, [r1]
- cbz r2, strcpy_unalign_copy1byte
+ cbz r2, .Lstrcpy_unalign_copy1byte
ldrb r3, [r1, #1]
- cbz r3, strcpy_unalign_copy2bytes
+ cbz r3, .Lstrcpy_unalign_copy2bytes
ldr r2, [r1], #4
ldr r3, [r1], #4
pld [r1, #64]
tst r2, #0xff0000
- beq strcpy_unalign_copy3bytes
+ beq .Lstrcpy_unalign_copy3bytes
lsrs ip, r2, #24
- beq strcpy_unalign_copy4bytes
+ beq .Lstrcpy_unalign_copy4bytes
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcpy_zero_in_second_register
+ bne .Lstrcpy_zero_in_second_register
stmia r0!, {r2, r3}
- b strcpy_unalign2
+ b .Lstrcpy_unalign2
.p2align 2
// Can read 1 byte before possibly crossing a page.
-strcpy_unalign1:
+.Lstrcpy_unalign1:
ldrb r2, [r1]
- cbz r2, strcpy_unalign_copy1byte
+ cbz r2, .Lstrcpy_unalign_copy1byte
ldr r2, [r1], #4
ldr r3, [r1], #4
@@ -487,62 +494,62 @@ strcpy_unalign1:
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
+ bne .Lstrcpy_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcpy_zero_in_second_register
+ bne .Lstrcpy_zero_in_second_register
stmia r0!, {r2, r3}
- b strcpy_unalign1
+ b .Lstrcpy_unalign1
-strcpy_unalign_copy1byte:
+.Lstrcpy_unalign_copy1byte:
strb r2, [r0]
m_ret inst=pop
-strcpy_unalign_copy2bytes:
+.Lstrcpy_unalign_copy2bytes:
strb r2, [r0], #1
strb r3, [r0]
m_ret inst=pop
-strcpy_unalign_copy3bytes:
+.Lstrcpy_unalign_copy3bytes:
strh r2, [r0], #2
lsr r2, #16
strb r2, [r0]
m_ret inst=pop
-strcpy_unalign_copy4bytes:
+.Lstrcpy_unalign_copy4bytes:
stmia r0, {r2}
m_ret inst=pop
-strcat_align_src:
+.Lstrcat_align_src:
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
- beq strcat_align_to_32
+ beq .Lstrcat_align_to_32
ldrb r2, [r0], #1
- cbz r2, strcat_r0_update
+ cbz r2, .Lstrcat_r0_update
-strcat_align_to_32:
- bcc strcat_align_to_64
+.Lstrcat_align_to_32:
+ bcc .Lstrcat_align_to_64
ldrb r2, [r0], #1
- cbz r2, strcat_r0_update
+ cbz r2, .Lstrcat_r0_update
ldrb r2, [r0], #1
- cbz r2, strcat_r0_update
+ cbz r2, .Lstrcat_r0_update
-strcat_align_to_64:
+.Lstrcat_align_to_64:
tst r3, #4
- beq strcat_mainloop
+ beq .Lstrcat_mainloop
ldr r3, [r0], #4
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcat_zero_in_second_register
- b strcat_mainloop
+ bne .Lstrcat_zero_in_second_register
+ b .Lstrcat_mainloop
-strcat_r0_update:
+.Lstrcat_r0_update:
sub r0, r0, #1
- b strcat_r0_scan_done
+ b .Lstrcat_r0_scan_done
END(strcat)
diff --git a/libc/arch-arm/cortex-a9/bionic/string_copy.S b/libc/arch-arm/cortex-a9/bionic/string_copy.S
index caf5a11..642db0f 100644
--- a/libc/arch-arm/cortex-a9/bionic/string_copy.S
+++ b/libc/arch-arm/cortex-a9/bionic/string_copy.S
@@ -244,13 +244,20 @@ ENTRY(strcpy)
.Lstringcopy_align_to_64:
tst r3, #4
beq .Lstringcopy_check_src_align
- ldr r2, [r1], #4
-
- sub ip, r2, #0x01010101
- bic ip, ip, r2
- ands ip, ip, #0x80808080
- bne .Lstringcopy_zero_in_first_register
- stmia r0!, {r2}
+ // Read one byte at a time since we don't have any idea about the alignment
+ // of the source and we don't want to read into a different page.
+ ldrb r2, [r1], #1
+ strb r2, [r0], #1
+ cbz r2, .Lstringcopy_complete
+ ldrb r2, [r1], #1
+ strb r2, [r0], #1
+ cbz r2, .Lstringcopy_complete
+ ldrb r2, [r1], #1
+ strb r2, [r0], #1
+ cbz r2, .Lstringcopy_complete
+ ldrb r2, [r1], #1
+ strb r2, [r0], #1
+ cbz r2, .Lstringcopy_complete
b .Lstringcopy_check_src_align
.Lstringcopy_complete:
diff --git a/libc/arch-arm/cortex-a9/cortex-a9.mk b/libc/arch-arm/cortex-a9/cortex-a9.mk
index 7b38de1..db4bcc7 100644
--- a/libc/arch-arm/cortex-a9/cortex-a9.mk
+++ b/libc/arch-arm/cortex-a9/cortex-a9.mk
@@ -10,6 +10,7 @@ libc_bionic_src_files_arm += \
arch-arm/cortex-a9/bionic/strlen.S \
libc_bionic_src_files_arm += \
+ arch-arm/generic/bionic/memchr.S \
arch-arm/generic/bionic/memcmp.S \
libc_bionic_src_files_arm += \
diff --git a/libc/arch-arm/denver/denver.mk b/libc/arch-arm/denver/denver.mk
index 5fddf95..e81f8c7 100644
--- a/libc/arch-arm/denver/denver.mk
+++ b/libc/arch-arm/denver/denver.mk
@@ -1,4 +1,5 @@
libc_bionic_src_files_arm += \
+ arch-arm/generic/bionic/memchr.S \
arch-arm/generic/bionic/memcmp.S \
arch-arm/denver/bionic/memcpy.S \
arch-arm/denver/bionic/memmove.S \
diff --git a/libc/arch-arm/generic/bionic/memchr.S b/libc/arch-arm/generic/bionic/memchr.S
new file mode 100644
index 0000000..cb00d82
--- /dev/null
+++ b/libc/arch-arm/generic/bionic/memchr.S
@@ -0,0 +1,155 @@
+/* Copyright (c) 2010-2015, Linaro Limited
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of Linaro Limited nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ Written by Dave Gilbert <david.gilbert@linaro.org>
+
+ This memchr routine is optimised on a Cortex-A9 and should work on
+ all ARMv7 processors. It has a fast past for short sizes, and has
+ an optimised path for large data sets; the worst case is finding the
+ match early in a large data set.
+
+ */
+
+#include <private/bionic_asm.h>
+
+@ 2011-02-07 david.gilbert@linaro.org
+@ Extracted from local git a5b438d861
+@ 2011-07-14 david.gilbert@linaro.org
+@ Import endianness fix from local git ea786f1b
+@ 2011-12-07 david.gilbert@linaro.org
+@ Removed unneeded cbz from align loop
+
+ .syntax unified
+ .arch armv7-a
+
+@ this lets us check a flag in a 00/ff byte easily in either endianness
+#ifdef __ARMEB__
+#define CHARTSTMASK(c) 1<<(31-(c*8))
+#else
+#define CHARTSTMASK(c) 1<<(c*8)
+#endif
+ .text
+ .thumb
+
+@ ---------------------------------------------------------------------------
+ .thumb_func
+ENTRY(memchr)
+ .p2align 4,,15
+ @ r0 = start of memory to scan
+ @ r1 = character to look for
+ @ r2 = length
+ @ returns r0 = pointer to character or NULL if not found
+ and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char
+
+ cmp r2,#16 @ If it's short don't bother with anything clever
+ blt 20f
+
+ tst r0, #7 @ If it's already aligned skip the next bit
+ beq 10f
+
+ @ Work up to an aligned point
+5:
+ ldrb r3, [r0],#1
+ subs r2, r2, #1
+ cmp r3, r1
+ beq 50f @ If it matches exit found
+ tst r0, #7
+ bne 5b @ If not aligned yet then do next byte
+
+10:
+ @ At this point, we are aligned, we know we have at least 8 bytes to work with
+ push {r4,r5,r6,r7}
+ orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes
+ orr r1, r1, r1, lsl #16
+ bic r4, r2, #7 @ Number of double words to work with
+ mvns r7, #0 @ all F's
+ movs r3, #0
+
+15:
+ ldrd r5,r6,[r0],#8
+ subs r4, r4, #8
+ eor r5,r5, r1 @ Get it so that r5,r6 have 00's where the bytes match the target
+ eor r6,r6, r1
+ uadd8 r5, r5, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0
+ sel r5, r3, r7 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
+ uadd8 r6, r6, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0
+ sel r6, r5, r7 @ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
+ cbnz r6, 60f
+ bne 15b @ (Flags from the subs above) If not run out of bytes then go around again
+
+ pop {r4,r5,r6,r7}
+ and r1,r1,#0xff @ Get r1 back to a single character from the expansion above
+ and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done
+
+20:
+ cbz r2, 40f @ 0 length or hit the end already then not found
+
+21: @ Post aligned section, or just a short call
+ ldrb r3,[r0],#1
+ subs r2,r2,#1
+ eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub
+ cbz r3, 50f
+ bne 21b @ on r2 flags
+
+40:
+ movs r0,#0 @ not found
+ bx lr
+
+50:
+ subs r0,r0,#1 @ found
+ bx lr
+
+60: @ We're here because the fast path found a hit - now we have to track down exactly which word it was
+ @ r0 points to the start of the double word after the one that was tested
+ @ r5 has the 00/ff pattern for the first word, r6 has the chained value
+ cmp r5, #0
+ itte eq
+ moveq r5, r6 @ the end is in the 2nd word
+ subeq r0,r0,#3 @ Points to 2nd byte of 2nd word
+ subne r0,r0,#7 @ or 2nd byte of 1st word
+
+ @ r0 currently points to the 3rd byte of the word containing the hit
+ tst r5, # CHARTSTMASK(0) @ 1st character
+ bne 61f
+ adds r0,r0,#1
+ tst r5, # CHARTSTMASK(1) @ 2nd character
+ ittt eq
+ addeq r0,r0,#1
+ tsteq r5, # (3<<15) @ 2nd & 3rd character
+ @ If not the 3rd must be the last one
+ addeq r0,r0,#1
+
+61:
+ pop {r4,r5,r6,r7}
+ subs r0,r0,#1
+ bx lr
+END(memchr)
diff --git a/libc/arch-arm/generic/bionic/memcmp.S b/libc/arch-arm/generic/bionic/memcmp.S
index c78dbd4..6643d55 100644
--- a/libc/arch-arm/generic/bionic/memcmp.S
+++ b/libc/arch-arm/generic/bionic/memcmp.S
@@ -221,8 +221,7 @@ ENTRY(memcmp)
bne 8b
9: /* restore registers and return */
- ldmfd sp!, {r4, lr}
- bx lr
+ ldmfd sp!, {r4, pc}
10: /* process less than 12 bytes */
cmp r2, #0
diff --git a/libc/arch-arm/generic/bionic/memcpy.S b/libc/arch-arm/generic/bionic/memcpy.S
index ea5a399..65cba4c 100644
--- a/libc/arch-arm/generic/bionic/memcpy.S
+++ b/libc/arch-arm/generic/bionic/memcpy.S
@@ -194,8 +194,7 @@ ENTRY(memcpy)
/* we're done! restore everything and return */
1: ldmfd sp!, {r5-r11}
- ldmfd sp!, {r0, r4, lr}
- bx lr
+ ldmfd sp!, {r0, r4, pc}
/********************************************************************/
@@ -385,8 +384,7 @@ ENTRY(memcpy)
/* we're done! restore sp and spilled registers and return */
add sp, sp, #28
- ldmfd sp!, {r0, r4, lr}
- bx lr
+ ldmfd sp!, {r0, r4, pc}
END(memcpy)
// Only reached when the __memcpy_chk check fails.
diff --git a/libc/arch-arm/generic/bionic/memset.S b/libc/arch-arm/generic/bionic/memset.S
index d17a9c4..b8eabbf 100644
--- a/libc/arch-arm/generic/bionic/memset.S
+++ b/libc/arch-arm/generic/bionic/memset.S
@@ -82,8 +82,7 @@ ENTRY(memset)
strbcs r1, [r0], #1
strbmi r1, [r0], #1
subs r2, r2, r3
- popls {r0, r4-r7, lr} /* return */
- bxls lr
+ popls {r0, r4-r7, pc} /* return */
/* align the destination to a cache-line */
mov r12, r1
@@ -126,8 +125,7 @@ ENTRY(memset)
strhmi r1, [r0], #2
movs r2, r2, lsl #2
strbcs r1, [r0]
- ldmfd sp!, {r0, r4-r7, lr}
- bx lr
+ ldmfd sp!, {r0, r4-r7, pc}
END(memset)
.data
diff --git a/libc/arch-arm/generic/generic.mk b/libc/arch-arm/generic/generic.mk
index e49d6d2..016c882 100644
--- a/libc/arch-arm/generic/generic.mk
+++ b/libc/arch-arm/generic/generic.mk
@@ -1,4 +1,5 @@
libc_bionic_src_files_arm += \
+ arch-arm/generic/bionic/memchr.S \
arch-arm/generic/bionic/memcmp.S \
arch-arm/generic/bionic/memcpy.S \
arch-arm/generic/bionic/memset.S \
diff --git a/libc/arch-arm/krait/bionic/__strcat_chk.S b/libc/arch-arm/krait/bionic/__strcat_chk.S
index 246f159..1a39c5b 100644
--- a/libc/arch-arm/krait/bionic/__strcat_chk.S
+++ b/libc/arch-arm/krait/bionic/__strcat_chk.S
@@ -40,7 +40,7 @@
ENTRY(__strcat_chk)
pld [r0, #0]
push {r0, lr}
- .cfi_def_cfa_offset 8
+ .cfi_adjust_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
push {r4, r5}
@@ -177,7 +177,7 @@ ENTRY(__strcat_chk)
.L_strlen_done:
add r2, r3, r4
cmp r2, lr
- bhi __strcat_chk_failed
+ bhi .L_strcat_chk_failed
// Set up the registers for the memcpy code.
mov r1, r5
@@ -185,20 +185,17 @@ ENTRY(__strcat_chk)
mov r2, r4
add r0, r0, r3
pop {r4, r5}
-END(__strcat_chk)
+ .cfi_adjust_cfa_offset -8
+ .cfi_restore r4
+ .cfi_restore r5
-#define MEMCPY_BASE __strcat_chk_memcpy_base
-#define MEMCPY_BASE_ALIGNED __strcat_chk_memcpy_base_aligned
#include "memcpy_base.S"
-ENTRY_PRIVATE(__strcat_chk_failed)
- .cfi_def_cfa_offset 8
- .cfi_rel_offset r0, 0
- .cfi_rel_offset lr, 4
+ // Undo the above cfi directives.
.cfi_adjust_cfa_offset 8
.cfi_rel_offset r4, 0
.cfi_rel_offset r5, 4
-
+.L_strcat_chk_failed:
ldr r0, error_message
ldr r1, error_code
1:
@@ -208,7 +205,7 @@ error_code:
.word BIONIC_EVENT_STRCAT_BUFFER_OVERFLOW
error_message:
.word error_string-(1b+4)
-END(__strcat_chk_failed)
+END(__strcat_chk)
.data
error_string:
diff --git a/libc/arch-arm/krait/bionic/__strcpy_chk.S b/libc/arch-arm/krait/bionic/__strcpy_chk.S
index db76686..00202f3 100644
--- a/libc/arch-arm/krait/bionic/__strcpy_chk.S
+++ b/libc/arch-arm/krait/bionic/__strcpy_chk.S
@@ -39,7 +39,7 @@
ENTRY(__strcpy_chk)
pld [r0, #0]
push {r0, lr}
- .cfi_def_cfa_offset 8
+ .cfi_adjust_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
@@ -149,21 +149,14 @@ ENTRY(__strcpy_chk)
pld [r1, #64]
ldr r0, [sp]
cmp r3, lr
- bhs __strcpy_chk_failed
+ bhs .L_strcpy_chk_failed
// Add 1 for copy length to get the string terminator.
add r2, r3, #1
-END(__strcpy_chk)
-#define MEMCPY_BASE __strcpy_chk_memcpy_base
-#define MEMCPY_BASE_ALIGNED __strcpy_chk_memcpy_base_aligned
#include "memcpy_base.S"
-ENTRY_PRIVATE(__strcpy_chk_failed)
- .cfi_def_cfa_offset 8
- .cfi_rel_offset r0, 0
- .cfi_rel_offset lr, 4
-
+.L_strcpy_chk_failed:
ldr r0, error_message
ldr r1, error_code
1:
@@ -173,7 +166,7 @@ error_code:
.word BIONIC_EVENT_STRCPY_BUFFER_OVERFLOW
error_message:
.word error_string-(1b+4)
-END(__strcpy_chk_failed)
+END(__strcpy_chk)
.data
error_string:
diff --git a/libc/arch-arm/krait/bionic/memcpy.S b/libc/arch-arm/krait/bionic/memcpy.S
index 9ff46a8..5d27b57 100644
--- a/libc/arch-arm/krait/bionic/memcpy.S
+++ b/libc/arch-arm/krait/bionic/memcpy.S
@@ -45,7 +45,7 @@
ENTRY(__memcpy_chk)
cmp r2, r3
- bhi __memcpy_chk_fail
+ bhi .L_memcpy_chk_fail
// Fall through to memcpy...
END(__memcpy_chk)
@@ -53,19 +53,20 @@ END(__memcpy_chk)
ENTRY(memcpy)
pld [r1, #64]
stmfd sp!, {r0, lr}
- .cfi_def_cfa_offset 8
+ .cfi_adjust_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
-END(memcpy)
-#define MEMCPY_BASE __memcpy_base
-#define MEMCPY_BASE_ALIGNED __memcpy_base_aligned
#include "memcpy_base.S"
-ENTRY_PRIVATE(__memcpy_chk_fail)
+ // Undo the cfi directives from above.
+ .cfi_adjust_cfa_offset -8
+ .cfi_restore r0
+ .cfi_restore lr
+.L_memcpy_chk_fail:
// Preserve lr for backtrace.
push {lr}
- .cfi_def_cfa_offset 4
+ .cfi_adjust_cfa_offset 4
.cfi_rel_offset lr, 0
ldr r0, error_message
@@ -77,7 +78,7 @@ error_code:
.word BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW
error_message:
.word error_string-(1b+4)
-END(__memcpy_chk_fail)
+END(memcpy)
.data
error_string:
diff --git a/libc/arch-arm/krait/bionic/memcpy_base.S b/libc/arch-arm/krait/bionic/memcpy_base.S
index 035dcf1..76c5a84 100644
--- a/libc/arch-arm/krait/bionic/memcpy_base.S
+++ b/libc/arch-arm/krait/bionic/memcpy_base.S
@@ -1,123 +1,191 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-
-/*
- * This code assumes it is running on a processor that supports all arm v7
- * instructions, that supports neon instructions, and that has a 32 byte
- * cache line.
- */
-
-// Assumes neon instructions and a cache line size of 32 bytes.
-
-ENTRY_PRIVATE(MEMCPY_BASE)
- .cfi_def_cfa_offset 8
- .cfi_rel_offset r0, 0
- .cfi_rel_offset lr, 4
-
- /* do we have at least 16-bytes to copy (needed for alignment below) */
- cmp r2, #16
- blo 5f
-
- /* align destination to cache-line for the write-buffer */
- rsb r3, r0, #0
- ands r3, r3, #0xF
- beq 2f
-
- /* copy up to 15-bytes (count in r3) */
- sub r2, r2, r3
- movs ip, r3, lsl #31
- itt mi
- ldrbmi lr, [r1], #1
- strbmi lr, [r0], #1
- itttt cs
- ldrbcs ip, [r1], #1
- ldrbcs lr, [r1], #1
- strbcs ip, [r0], #1
- strbcs lr, [r0], #1
- movs ip, r3, lsl #29
- bge 1f
- // copies 4 bytes, destination 32-bits aligned
- vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]!
- vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]!
-1: bcc 2f
- // copies 8 bytes, destination 64-bits aligned
- vld1.8 {d0}, [r1]!
- vst1.8 {d0}, [r0, :64]!
-
-2: /* make sure we have at least 64 bytes to copy */
- subs r2, r2, #64
- blo 2f
-
-1: /* The main loop copies 64 bytes at a time */
- vld1.8 {d0 - d3}, [r1]!
- vld1.8 {d4 - d7}, [r1]!
- pld [r1, #(32*8)]
- subs r2, r2, #64
- vst1.8 {d0 - d3}, [r0, :128]!
- vst1.8 {d4 - d7}, [r0, :128]!
- bhs 1b
-
-2: /* fix-up the remaining count and make sure we have >= 32 bytes left */
- adds r2, r2, #32
- blo 4f
-
- /* Copy 32 bytes. These cache lines were already preloaded */
- vld1.8 {d0 - d3}, [r1]!
- sub r2, r2, #32
- vst1.8 {d0 - d3}, [r0, :128]!
-
-4: /* less than 32 left */
- add r2, r2, #32
- tst r2, #0x10
- beq 5f
- // copies 16 bytes, 128-bits aligned
- vld1.8 {d0, d1}, [r1]!
- vst1.8 {d0, d1}, [r0, :128]!
-
-5: /* copy up to 15-bytes (count in r2) */
- movs ip, r2, lsl #29
- bcc 1f
- vld1.8 {d0}, [r1]!
- vst1.8 {d0}, [r0]!
-1: bge 2f
- vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]!
- vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]!
-2: movs ip, r2, lsl #31
- itt mi
- ldrbmi r3, [r1], #1
- strbmi r3, [r0], #1
- itttt cs
- ldrbcs ip, [r1], #1
- ldrbcs lr, [r1], #1
- strbcs ip, [r0], #1
- strbcs lr, [r0], #1
-
- ldmfd sp!, {r0, lr}
- bx lr
-END(MEMCPY_BASE)
+/***************************************************************************
+ Copyright (c) 2009-2013 The Linux Foundation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of The Linux Foundation nor the names of its contributors may
+ be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/* Assumes neon instructions and a cache line size of 64 bytes. */
+
+#include <machine/cpu-features.h>
+#include <machine/asm.h>
+
+#define PLDOFFS (10)
+#define PLDTHRESH (PLDOFFS)
+#define BBTHRESH (4096/64)
+#define PLDSIZE (64)
+
+#if (PLDOFFS < 1)
+#error Routine does not support offsets less than 1
+#endif
+
+#if (PLDTHRESH < PLDOFFS)
+#error PLD threshold must be greater than or equal to the PLD offset
+#endif
+
+ .text
+ .fpu neon
+
+.L_memcpy_base:
+ cmp r2, #4
+ blt .L_neon_lt4
+ cmp r2, #16
+ blt .L_neon_lt16
+ cmp r2, #32
+ blt .L_neon_16
+ cmp r2, #64
+ blt .L_neon_copy_32_a
+
+ mov r12, r2, lsr #6
+ cmp r12, #PLDTHRESH
+ ble .L_neon_copy_64_loop_nopld
+
+ push {r9, r10}
+ .cfi_adjust_cfa_offset 8
+ .cfi_rel_offset r9, 0
+ .cfi_rel_offset r10, 4
+
+ cmp r12, #BBTHRESH
+ ble .L_neon_prime_pump
+
+ add lr, r0, #0x400
+ add r9, r1, #(PLDOFFS*PLDSIZE)
+ sub lr, lr, r9
+ lsl lr, lr, #21
+ lsr lr, lr, #21
+ add lr, lr, #(PLDOFFS*PLDSIZE)
+ cmp r12, lr, lsr #6
+ ble .L_neon_prime_pump
+
+ itt gt
+ movgt r9, #(PLDOFFS)
+ rsbsgt r9, r9, lr, lsr #6
+ ble .L_neon_prime_pump
+
+ add r10, r1, lr
+ bic r10, #0x3F
+
+ sub r12, r12, lr, lsr #6
+
+ cmp r9, r12
+ itee le
+ suble r12, r12, r9
+ movgt r9, r12
+ movgt r12, #0
+
+ pld [r1, #((PLDOFFS-1)*PLDSIZE)]
+.L_neon_copy_64_loop_outer_doublepld:
+ pld [r1, #((PLDOFFS)*PLDSIZE)]
+ vld1.32 {q0, q1}, [r1]!
+ vld1.32 {q2, q3}, [r1]!
+ ldr r3, [r10]
+ subs r9, r9, #1
+ vst1.32 {q0, q1}, [r0]!
+ vst1.32 {q2, q3}, [r0]!
+ add r10, #64
+ bne .L_neon_copy_64_loop_outer_doublepld
+ cmp r12, #0
+ beq .L_neon_pop_before_nopld
+
+ cmp r12, #(512*1024/64)
+ blt .L_neon_copy_64_loop_outer
+
+.L_neon_copy_64_loop_ddr:
+ vld1.32 {q0, q1}, [r1]!
+ vld1.32 {q2, q3}, [r1]!
+ pld [r10]
+ subs r12, r12, #1
+ vst1.32 {q0, q1}, [r0]!
+ vst1.32 {q2, q3}, [r0]!
+ add r10, #64
+ bne .L_neon_copy_64_loop_ddr
+ b .L_neon_pop_before_nopld
+
+.L_neon_prime_pump:
+ mov lr, #(PLDOFFS*PLDSIZE)
+ add r10, r1, #(PLDOFFS*PLDSIZE)
+ bic r10, #0x3F
+ sub r12, r12, #PLDOFFS
+ ldr r3, [r10, #(-1*PLDSIZE)]
+
+.L_neon_copy_64_loop_outer:
+ vld1.32 {q0, q1}, [r1]!
+ vld1.32 {q2, q3}, [r1]!
+ ldr r3, [r10]
+ subs r12, r12, #1
+ vst1.32 {q0, q1}, [r0]!
+ vst1.32 {q2, q3}, [r0]!
+ add r10, #64
+ bne .L_neon_copy_64_loop_outer
+
+.L_neon_pop_before_nopld:
+ mov r12, lr, lsr #6
+ pop {r9, r10}
+ .cfi_adjust_cfa_offset -8
+ .cfi_restore r9
+ .cfi_restore r10
+
+.L_neon_copy_64_loop_nopld:
+ vld1.32 {q8, q9}, [r1]!
+ vld1.32 {q10, q11}, [r1]!
+ subs r12, r12, #1
+ vst1.32 {q8, q9}, [r0]!
+ vst1.32 {q10, q11}, [r0]!
+ bne .L_neon_copy_64_loop_nopld
+ ands r2, r2, #0x3f
+ beq .L_neon_exit
+
+.L_neon_copy_32_a:
+ movs r3, r2, lsl #27
+ bcc .L_neon_16
+ vld1.32 {q0,q1}, [r1]!
+ vst1.32 {q0,q1}, [r0]!
+
+.L_neon_16:
+ bpl .L_neon_lt16
+ vld1.32 {q8}, [r1]!
+ vst1.32 {q8}, [r0]!
+ ands r2, r2, #0x0f
+ beq .L_neon_exit
+
+.L_neon_lt16:
+ movs r3, r2, lsl #29
+ bcc 1f
+ vld1.8 {d0}, [r1]!
+ vst1.8 {d0}, [r0]!
+1:
+ bge .L_neon_lt4
+ vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]!
+ vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]!
+
+.L_neon_lt4:
+ movs r2, r2, lsl #31
+ itt cs
+ ldrhcs r3, [r1], #2
+ strhcs r3, [r0], #2
+ itt mi
+ ldrbmi r3, [r1]
+ strbmi r3, [r0]
+
+.L_neon_exit:
+ pop {r0, pc}
diff --git a/libc/arch-arm/krait/bionic/memmove.S b/libc/arch-arm/krait/bionic/memmove.S
new file mode 100644
index 0000000..aea7315
--- /dev/null
+++ b/libc/arch-arm/krait/bionic/memmove.S
@@ -0,0 +1,219 @@
+/***************************************************************************
+ Copyright (c) 2009-2014 The Linux Foundation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of The Linux Foundation nor the names of its contributors may
+ be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/***************************************************************************
+ * Neon memmove: Attempts to do a memmove with Neon registers if possible,
+ * Inputs:
+ * dest: The destination buffer
+ * src: The source buffer
+ * n: The size of the buffer to transfer
+ * Outputs:
+ *
+ ***************************************************************************/
+
+#include <private/bionic_asm.h>
+#include <private/libc_events.h>
+/*
+ * These can be overridden in:
+ * device/<vendor>/<board>/BoardConfig.mk
+ * by setting the following:
+ * TARGET_USE_KRAIT_BIONIC_OPTIMIZATION := true
+ * TARGET_USE_KRAIT_PLD_SET := true
+ * TARGET_KRAIT_BIONIC_PLDOFFS := <pldoffset>
+ * TARGET_KRAIT_BIONIC_PLDSIZE := <pldsize>
+ * TARGET_KRAIT_BIONIC_PLDTHRESH := <pldthreshold>
+ */
+#ifndef PLDOFFS
+#define PLDOFFS (10)
+#endif
+#ifndef PLDTHRESH
+#define PLDTHRESH (PLDOFFS)
+#endif
+#if (PLDOFFS < 5)
+#error Routine does not support offsets less than 5
+#endif
+#if (PLDTHRESH < PLDOFFS)
+#error PLD threshold must be greater than or equal to the PLD offset
+#endif
+#ifndef PLDSIZE
+#define PLDSIZE (64)
+#endif
+
+ .text
+ .syntax unified
+ .fpu neon
+ .thumb
+ .thumb_func
+
+//ENTRY(bcopy)
+// //.cfi_startproc
+// mov r12, r0
+// mov r0, r1
+// mov r1, r12
+// // Fall through to memmove
+// //.cfi_endproc
+//END(bcopy)
+
+ENTRY(memmove)
+_memmove_words:
+ //.cfi_startproc
+ .save {r0, lr}
+ cmp r2, #0
+ it ne
+ subsne r12, r0, r1 // Warning: do not combine these "it" blocks
+ it eq
+ bxeq lr
+// memmove only if r1 < r0 < r1+r2
+ cmp r0, r1
+ itt ge
+ addge r12, r1, r2
+ cmpge r12, r0
+ it le
+ ble memcpy
+ cmp r2, #4
+ it le
+ ble .Lneon_b2f_smallcopy_loop
+ push {r0, lr}
+ add r0, r0, r2
+ add r1, r1, r2
+ cmp r2, #64
+ it ge
+ bge .Lneon_b2f_copy_64
+ cmp r2, #32
+ it ge
+ bge .Lneon_b2f_copy_32
+ cmp r2, #8
+ it ge
+ bge .Lneon_b2f_copy_8
+ b .Lneon_b2f_copy_1
+.Lneon_b2f_copy_64:
+ mov r12, r2, lsr #6
+ add r0, r0, #32
+ add r1, r1, #32
+ cmp r12, #PLDTHRESH
+ it le
+ ble .Lneon_b2f_copy_64_loop_nopld
+ sub r12, #PLDOFFS
+ sub lr, r1, #(PLDOFFS)*PLDSIZE
+.Lneon_b2f_copy_64_loop_outer:
+ pld [lr]
+ sub r1, r1, #96
+ sub r0, r0, #96
+ vld1.32 {q0, q1}, [r1]!
+ vld1.32 {q2, q3}, [r1]
+ sub lr, lr, #64
+ subs r12, r12, #1
+ vst1.32 {q0, q1}, [r0]!
+ vst1.32 {q2, q3}, [r0]
+ it ne
+ bne .Lneon_b2f_copy_64_loop_outer
+ mov r12, #PLDOFFS
+.Lneon_b2f_copy_64_loop_nopld:
+ sub r1, r1, #96
+ sub r0, r0, #96
+ vld1.32 {q8, q9}, [r1]!
+ vld1.32 {q10, q11}, [r1]
+ subs r12, r12, #1
+ vst1.32 {q8, q9}, [r0]!
+ vst1.32 {q10, q11}, [r0]
+ it ne
+ bne .Lneon_b2f_copy_64_loop_nopld
+ ands r2, r2, #0x3f
+ it eq
+ beq .Lneon_memmove_done
+ sub r1, r1, #32
+ sub r0, r0, #32
+ cmp r2, #32
+ it lt
+ blt .Lneon_b2f_copy_8
+.Lneon_b2f_copy_32:
+ sub r1, r1, #32
+ sub r0, r0, #32
+ vld1.32 {q0, q1}, [r1]
+ vst1.32 {q0, q1}, [r0]
+ ands r2, r2, #0x1f
+ it eq
+ beq .Lneon_memmove_done
+.Lneon_b2f_copy_8:
+ movs r12, r2, lsr #0x3
+ it eq
+ beq .Lneon_b2f_copy_1
+.Lneon_b2f_copy_8_loop:
+ sub r1, r1, #8
+ sub r0, r0, #8
+ vld1.32 {d0}, [r1]
+ subs r12, r12, #1
+ vst1.32 {d0}, [r0]
+ it ne
+ bne .Lneon_b2f_copy_8_loop
+ ands r2, r2, #0x7
+ beq .Lneon_memmove_done
+.Lneon_b2f_copy_1:
+ movs r12, r2, lsl #29
+ itttt mi
+ submi r1, r1, #4
+ submi r0, r0, #4
+ ldrmi r3, [r1]
+ strmi r3, [r0]
+ movs r2, r2, lsl #31
+ itttt cs
+ subcs r1, r1, #2
+ subcs r0, r0, #2
+ ldrhcs r3, [r1]
+ strhcs r3, [r0]
+ itttt mi
+ submi r1, r1, #1
+ submi r0, r0, #1
+ ldrbmi r12, [r1]
+ strbmi r12, [r0]
+.Lneon_memmove_done:
+ pop {r0, pc}
+.Lneon_b2f_smallcopy_loop:
+ // 4 bytes or less
+ add r1, r1, r2
+ add r0, r0, r2
+ movs r12, r2, lsl #29
+ itttt mi
+ submi r1, r1, #4
+ submi r0, r0, #4
+ ldrmi r3, [r1]
+ strmi r3, [r0]
+ movs r2, r2, lsl #31
+ itttt cs
+ subcs r1, r1, #2
+ subcs r0, r0, #2
+ ldrhcs r3, [r1]
+ strhcs r3, [r0]
+ itttt mi
+ submi r1, r1, #1
+ submi r0, r0, #1
+ ldrbmi r12, [r1]
+ strbmi r12, [r0]
+ bx lr
+// .cfi_endproc
+END(memmove)
+
diff --git a/libc/arch-arm/krait/bionic/memset.S b/libc/arch-arm/krait/bionic/memset.S
index a4fbe17..ae05965 100644
--- a/libc/arch-arm/krait/bionic/memset.S
+++ b/libc/arch-arm/krait/bionic/memset.S
@@ -69,10 +69,7 @@ END(bzero)
/* memset() returns its first argument. */
ENTRY(memset)
- stmfd sp!, {r0}
- .cfi_def_cfa_offset 4
- .cfi_rel_offset r0, 0
-
+ mov r3, r0
vdup.8 q0, r1
/* make sure we have at least 32 bytes to write */
@@ -82,7 +79,7 @@ ENTRY(memset)
1: /* The main loop writes 32 bytes at a time */
subs r2, r2, #32
- vst1.8 {d0 - d3}, [r0]!
+ vst1.8 {d0 - d3}, [r3]!
bhs 1b
2: /* less than 32 left */
@@ -91,18 +88,17 @@ ENTRY(memset)
beq 3f
// writes 16 bytes, 128-bits aligned
- vst1.8 {d0, d1}, [r0]!
+ vst1.8 {d0, d1}, [r3]!
3: /* write up to 15-bytes (count in r2) */
movs ip, r2, lsl #29
bcc 1f
- vst1.8 {d0}, [r0]!
+ vst1.8 {d0}, [r3]!
1: bge 2f
- vst1.32 {d0[0]}, [r0]!
+ vst1.32 {d0[0]}, [r3]!
2: movs ip, r2, lsl #31
- strbmi r1, [r0], #1
- strbcs r1, [r0], #1
- strbcs r1, [r0], #1
- ldmfd sp!, {r0}
+ strbmi r1, [r3], #1
+ strbcs r1, [r3], #1
+ strbcs r1, [r3], #1
bx lr
END(memset)
diff --git a/libc/arch-arm/krait/krait.mk b/libc/arch-arm/krait/krait.mk
index 88b4d66..5f5b414 100644
--- a/libc/arch-arm/krait/krait.mk
+++ b/libc/arch-arm/krait/krait.mk
@@ -1,9 +1,19 @@
libc_bionic_src_files_arm += \
- arch-arm/krait/bionic/memcpy.S \
arch-arm/krait/bionic/memset.S \
arch-arm/krait/bionic/strcmp.S \
arch-arm/krait/bionic/__strcat_chk.S \
arch-arm/krait/bionic/__strcpy_chk.S \
+ arch-arm/krait/bionic/memmove.S
+
+#For some targets we don't need this optimization.
+#Corresponding flag is defined in device specific folder.
+ifeq ($(TARGET_CPU_MEMCPY_BASE_OPT_DISABLE),true)
+libc_bionic_src_files_arm += \
+ arch-arm/cortex-a15/bionic/memcpy.S
+else
+libc_bionic_src_files_arm += \
+ arch-arm/krait/bionic/memcpy.S
+endif
# Use cortex-a15 versions of strcat/strcpy/strlen and standard memmove
libc_bionic_src_files_arm += \
@@ -13,7 +23,7 @@ libc_bionic_src_files_arm += \
arch-arm/cortex-a15/bionic/strlen.S \
libc_bionic_src_files_arm += \
+ arch-arm/generic/bionic/memchr.S \
arch-arm/generic/bionic/memcmp.S \
-libc_bionic_src_files_arm += \
- arch-arm/denver/bionic/memmove.S \
+
diff --git a/libc/arch-arm/scorpion/scorpion.mk b/libc/arch-arm/scorpion/scorpion.mk
new file mode 100644
index 0000000..ce18a7e
--- /dev/null
+++ b/libc/arch-arm/scorpion/scorpion.mk
@@ -0,0 +1,18 @@
+# Use krait versions of memset/strcmp/memmove
+libc_bionic_src_files_arm += \
+ arch-arm/krait/bionic/memset.S \
+ arch-arm/krait/bionic/strcmp.S \
+ arch-arm/krait/bionic/memmove.S
+
+libc_bionic_src_files_arm += \
+ arch-arm/cortex-a15/bionic/memcpy.S \
+ arch-arm/cortex-a15/bionic/stpcpy.S \
+ arch-arm/cortex-a15/bionic/strcat.S \
+ arch-arm/cortex-a15/bionic/__strcat_chk.S \
+ arch-arm/cortex-a15/bionic/strcpy.S \
+ arch-arm/cortex-a15/bionic/__strcpy_chk.S \
+ arch-arm/cortex-a15/bionic/strlen.S
+
+libc_bionic_src_files_arm += \
+ arch-arm/generic/bionic/memchr.S \
+ arch-arm/generic/bionic/memcmp.S
diff --git a/libc/arch-arm64/arm64.mk b/libc/arch-arm64/arm64.mk
index 470a038..1b8d534 100644
--- a/libc/arch-arm64/arm64.mk
+++ b/libc/arch-arm64/arm64.mk
@@ -8,7 +8,6 @@ libc_bionic_src_files_arm64 += \
bionic/__memset_chk.cpp \
bionic/__strcpy_chk.cpp \
bionic/__strcat_chk.cpp \
- bionic/strrchr.cpp \
libc_freebsd_src_files_arm64 += \
upstream-freebsd/lib/libc/string/wcscat.c \
diff --git a/libc/arch-arm64/denver64/bionic/memmove.S b/libc/arch-arm64/denver64/bionic/memmove.S
new file mode 100644
index 0000000..739ce49
--- /dev/null
+++ b/libc/arch-arm64/denver64/bionic/memmove.S
@@ -0,0 +1,329 @@
+/* Copyright (c) 2014, Linaro Limited
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the Linaro nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * Unaligned accesses
+ * wchar_t is 4 bytes
+ */
+
+#include <private/bionic_asm.h>
+
+/* Parameters and result. */
+#define dstin x0
+#define src x1
+#define count x2
+#define tmp1 x3
+#define tmp1w w3
+#define tmp2 x4
+#define tmp2w w4
+#define tmp3 x5
+#define tmp3w w5
+#define dst x6
+
+#define A_l x7
+#define A_h x8
+#define B_l x9
+#define B_h x10
+#define C_l x11
+#define C_h x12
+#define D_l x13
+#define D_h x14
+
+#if defined(WMEMMOVE)
+ENTRY(wmemmove)
+ lsl count, count, #2
+#else
+ENTRY(memmove)
+#endif
+ cmp dstin, src
+ b.lo .Ldownwards
+ add tmp1, src, count
+ cmp dstin, tmp1
+ b.hs memcpy /* No overlap. */
+
+ /* Upwards move with potential overlap.
+ * Need to move from the tail backwards. SRC and DST point one
+ * byte beyond the remaining data to move. */
+ add dst, dstin, count
+ add src, src, count
+ cmp count, #64
+ b.ge .Lmov_not_short_up
+
+ /* Deal with small moves quickly by dropping straight into the
+ * exit block. */
+.Ltail63up:
+ /* Move up to 48 bytes of data. At this point we only need the
+ * bottom 6 bits of count to be accurate. */
+ ands tmp1, count, #0x30
+ b.eq .Ltail15up
+ sub dst, dst, tmp1
+ sub src, src, tmp1
+ cmp tmp1w, #0x20
+ b.eq 1f
+ b.lt 2f
+ ldp A_l, A_h, [src, #32]
+ stp A_l, A_h, [dst, #32]
+1:
+ ldp A_l, A_h, [src, #16]
+ stp A_l, A_h, [dst, #16]
+2:
+ ldp A_l, A_h, [src]
+ stp A_l, A_h, [dst]
+.Ltail15up:
+ /* Move up to 15 bytes of data. Does not assume additional data
+ * being moved. */
+ tbz count, #3, 1f
+ ldr tmp1, [src, #-8]!
+ str tmp1, [dst, #-8]!
+1:
+ tbz count, #2, 1f
+ ldr tmp1w, [src, #-4]!
+ str tmp1w, [dst, #-4]!
+1:
+ tbz count, #1, 1f
+ ldrh tmp1w, [src, #-2]!
+ strh tmp1w, [dst, #-2]!
+1:
+ tbz count, #0, 1f
+ ldrb tmp1w, [src, #-1]
+ strb tmp1w, [dst, #-1]
+1:
+ ret
+
+.Lmov_not_short_up:
+ /* We don't much care about the alignment of DST, but we want SRC
+ * to be 128-bit (16 byte) aligned so that we don't cross cache line
+ * boundaries on both loads and stores. */
+ ands tmp2, src, #15 /* Bytes to reach alignment. */
+ b.eq 2f
+ sub count, count, tmp2
+ /* Move enough data to reach alignment; unlike memcpy, we have to
+ * be aware of the overlap, which means we can't move data twice. */
+ tbz tmp2, #3, 1f
+ ldr tmp1, [src, #-8]!
+ str tmp1, [dst, #-8]!
+1:
+ tbz tmp2, #2, 1f
+ ldr tmp1w, [src, #-4]!
+ str tmp1w, [dst, #-4]!
+1:
+ tbz tmp2, #1, 1f
+ ldrh tmp1w, [src, #-2]!
+ strh tmp1w, [dst, #-2]!
+1:
+ tbz tmp2, #0, 1f
+ ldrb tmp1w, [src, #-1]!
+ strb tmp1w, [dst, #-1]!
+1:
+
+ /* There may be less than 63 bytes to go now. */
+ cmp count, #63
+ b.le .Ltail63up
+2:
+ subs count, count, #128
+ b.ge .Lmov_body_large_up
+ /* Less than 128 bytes to move, so handle 64 here and then jump
+ * to the tail. */
+ ldp A_l, A_h, [src, #-64]!
+ ldp B_l, B_h, [src, #16]
+ ldp C_l, C_h, [src, #32]
+ ldp D_l, D_h, [src, #48]
+ stp A_l, A_h, [dst, #-64]!
+ stp B_l, B_h, [dst, #16]
+ stp C_l, C_h, [dst, #32]
+ stp D_l, D_h, [dst, #48]
+ tst count, #0x3f
+ b.ne .Ltail63up
+ ret
+
+ /* Critical loop. Start at a new Icache line boundary. Assuming
+ * 64 bytes per line this ensures the entire loop is in one line. */
+ .p2align 6
+.Lmov_body_large_up:
+ /* There are at least 128 bytes to move. */
+ ldp A_l, A_h, [src, #-16]
+ ldp B_l, B_h, [src, #-32]
+ ldp C_l, C_h, [src, #-48]
+ ldp D_l, D_h, [src, #-64]!
+1:
+ stp A_l, A_h, [dst, #-16]
+ ldp A_l, A_h, [src, #-16]
+ stp B_l, B_h, [dst, #-32]
+ ldp B_l, B_h, [src, #-32]
+ stp C_l, C_h, [dst, #-48]
+ ldp C_l, C_h, [src, #-48]
+ stp D_l, D_h, [dst, #-64]!
+ ldp D_l, D_h, [src, #-64]!
+ subs count, count, #64
+ b.ge 1b
+ stp A_l, A_h, [dst, #-16]
+ stp B_l, B_h, [dst, #-32]
+ stp C_l, C_h, [dst, #-48]
+ stp D_l, D_h, [dst, #-64]!
+ tst count, #0x3f
+ b.ne .Ltail63up
+ ret
+
+
+.Ldownwards:
+ /* For a downwards move we can safely use memcpy provided that
+ * DST is more than 16 bytes away from SRC. */
+ sub tmp1, src, #16
+ cmp dstin, tmp1
+ b.ls memcpy /* May overlap, but not critically. */
+
+ mov dst, dstin /* Preserve DSTIN for return value. */
+ cmp count, #64
+ b.ge .Lmov_not_short_down
+
+ /* Deal with small moves quickly by dropping straight into the
+ * exit block. */
+.Ltail63down:
+ /* Move up to 48 bytes of data. At this point we only need the
+ * bottom 6 bits of count to be accurate. */
+ ands tmp1, count, #0x30
+ b.eq .Ltail15down
+ add dst, dst, tmp1
+ add src, src, tmp1
+ cmp tmp1w, #0x20
+ b.eq 1f
+ b.lt 2f
+ ldp A_l, A_h, [src, #-48]
+ stp A_l, A_h, [dst, #-48]
+1:
+ ldp A_l, A_h, [src, #-32]
+ stp A_l, A_h, [dst, #-32]
+2:
+ ldp A_l, A_h, [src, #-16]
+ stp A_l, A_h, [dst, #-16]
+.Ltail15down:
+ /* Move up to 15 bytes of data. Does not assume additional data
+ being moved. */
+ tbz count, #3, 1f
+ ldr tmp1, [src], #8
+ str tmp1, [dst], #8
+1:
+ tbz count, #2, 1f
+ ldr tmp1w, [src], #4
+ str tmp1w, [dst], #4
+1:
+ tbz count, #1, 1f
+ ldrh tmp1w, [src], #2
+ strh tmp1w, [dst], #2
+1:
+ tbz count, #0, 1f
+ ldrb tmp1w, [src]
+ strb tmp1w, [dst]
+1:
+ ret
+
+.Lmov_not_short_down:
+ /* We don't much care about the alignment of DST, but we want SRC
+ * to be 128-bit (16 byte) aligned so that we don't cross cache line
+ * boundaries on both loads and stores. */
+ neg tmp2, src
+ ands tmp2, tmp2, #15 /* Bytes to reach alignment. */
+ b.eq 2f
+ sub count, count, tmp2
+ /* Move enough data to reach alignment; unlike memcpy, we have to
+ * be aware of the overlap, which means we can't move data twice. */
+ tbz tmp2, #3, 1f
+ ldr tmp1, [src], #8
+ str tmp1, [dst], #8
+1:
+ tbz tmp2, #2, 1f
+ ldr tmp1w, [src], #4
+ str tmp1w, [dst], #4
+1:
+ tbz tmp2, #1, 1f
+ ldrh tmp1w, [src], #2
+ strh tmp1w, [dst], #2
+1:
+ tbz tmp2, #0, 1f
+ ldrb tmp1w, [src], #1
+ strb tmp1w, [dst], #1
+1:
+
+ /* There may be less than 63 bytes to go now. */
+ cmp count, #63
+ b.le .Ltail63down
+2:
+ subs count, count, #128
+ b.ge .Lmov_body_large_down
+ /* Less than 128 bytes to move, so handle 64 here and then jump
+ * to the tail. */
+ ldp A_l, A_h, [src]
+ ldp B_l, B_h, [src, #16]
+ ldp C_l, C_h, [src, #32]
+ ldp D_l, D_h, [src, #48]
+ stp A_l, A_h, [dst]
+ stp B_l, B_h, [dst, #16]
+ stp C_l, C_h, [dst, #32]
+ stp D_l, D_h, [dst, #48]
+ tst count, #0x3f
+ add src, src, #64
+ add dst, dst, #64
+ b.ne .Ltail63down
+ ret
+
+ /* Critical loop. Start at a new cache line boundary. Assuming
+ * 64 bytes per line this ensures the entire loop is in one line. */
+ .p2align 6
+.Lmov_body_large_down:
+ /* There are at least 128 bytes to move. */
+ ldp A_l, A_h, [src, #0]
+ sub dst, dst, #16 /* Pre-bias. */
+ ldp B_l, B_h, [src, #16]
+ ldp C_l, C_h, [src, #32]
+ ldp D_l, D_h, [src, #48]! /* src += 64 - Pre-bias. */
+1:
+ stp A_l, A_h, [dst, #16]
+ ldp A_l, A_h, [src, #16]
+ stp B_l, B_h, [dst, #32]
+ ldp B_l, B_h, [src, #32]
+ stp C_l, C_h, [dst, #48]
+ ldp C_l, C_h, [src, #48]
+ stp D_l, D_h, [dst, #64]!
+ ldp D_l, D_h, [src, #64]!
+ subs count, count, #64
+ b.ge 1b
+ stp A_l, A_h, [dst, #16]
+ stp B_l, B_h, [dst, #32]
+ stp C_l, C_h, [dst, #48]
+ stp D_l, D_h, [dst, #64]
+ add src, src, #16
+ add dst, dst, #64 + 16
+ tst count, #0x3f
+ b.ne .Ltail63down
+ ret
+#if defined(WMEMMOVE)
+END(wmemmove)
+#else
+END(memmove)
+#endif
diff --git a/libc/arch-arm64/denver64/denver64.mk b/libc/arch-arm64/denver64/denver64.mk
index d619c11..6b1e1ff 100644
--- a/libc/arch-arm64/denver64/denver64.mk
+++ b/libc/arch-arm64/denver64/denver64.mk
@@ -2,7 +2,7 @@ libc_bionic_src_files_arm64 += \
arch-arm64/generic/bionic/memchr.S \
arch-arm64/generic/bionic/memcmp.S \
arch-arm64/denver64/bionic/memcpy.S \
- arch-arm64/generic/bionic/memmove.S \
+ arch-arm64/denver64/bionic/memmove.S \
arch-arm64/denver64/bionic/memset.S \
arch-arm64/generic/bionic/stpcpy.S \
arch-arm64/generic/bionic/strchr.S \
@@ -11,4 +11,5 @@ libc_bionic_src_files_arm64 += \
arch-arm64/generic/bionic/strlen.S \
arch-arm64/generic/bionic/strncmp.S \
arch-arm64/generic/bionic/strnlen.S \
+ arch-arm64/generic/bionic/strrchr.S \
arch-arm64/generic/bionic/wmemmove.S
diff --git a/libc/arch-arm64/generic/bionic/memcpy_base.S b/libc/arch-arm64/generic/bionic/memcpy_base.S
index c5d42ce..f850624 100644
--- a/libc/arch-arm64/generic/bionic/memcpy_base.S
+++ b/libc/arch-arm64/generic/bionic/memcpy_base.S
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012, Linaro Limited
+/* Copyright (c) 2012-2013, Linaro Limited
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -22,158 +22,196 @@
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
+
+/*
+ * Copyright (c) 2015 ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
/* Assumptions:
*
- * ARMv8-a, AArch64
- * Unaligned accesses
+ * ARMv8-a, AArch64, unaligned accesses.
*
*/
+#include <private/bionic_asm.h>
+
#define dstin x0
#define src x1
#define count x2
-#define tmp1 x3
-#define tmp1w w3
-#define tmp2 x4
-#define tmp2w w4
-#define tmp3 x5
-#define tmp3w w5
-#define dst x6
-
-#define A_l x7
-#define A_h x8
-#define B_l x9
-#define B_h x10
-#define C_l x11
-#define C_h x12
-#define D_l x13
-#define D_h x14
-
- mov dst, dstin
- cmp count, #64
- b.ge .Lcpy_not_short
- cmp count, #15
- b.le .Ltail15tiny
-
- /* Deal with small copies quickly by dropping straight into the
- * exit block. */
-.Ltail63:
- /* Copy up to 48 bytes of data. At this point we only need the
- * bottom 6 bits of count to be accurate. */
- ands tmp1, count, #0x30
- b.eq .Ltail15
- add dst, dst, tmp1
- add src, src, tmp1
- cmp tmp1w, #0x20
- b.eq 1f
- b.lt 2f
- ldp A_l, A_h, [src, #-48]
- stp A_l, A_h, [dst, #-48]
-1:
- ldp A_l, A_h, [src, #-32]
- stp A_l, A_h, [dst, #-32]
-2:
- ldp A_l, A_h, [src, #-16]
- stp A_l, A_h, [dst, #-16]
-
-.Ltail15:
- ands count, count, #15
- beq 1f
- add src, src, count
- ldp A_l, A_h, [src, #-16]
- add dst, dst, count
- stp A_l, A_h, [dst, #-16]
+#define dst x3
+#define srcend x4
+#define dstend x5
+#define A_l x6
+#define A_lw w6
+#define A_h x7
+#define A_hw w7
+#define B_l x8
+#define B_lw w8
+#define B_h x9
+#define C_l x10
+#define C_h x11
+#define D_l x12
+#define D_h x13
+#define E_l src
+#define E_h count
+#define F_l srcend
+#define F_h dst
+#define tmp1 x9
+
+#define L(l) .L ## l
+
+/* Copies are split into 3 main cases: small copies of up to 16 bytes,
+ medium copies of 17..96 bytes which are fully unrolled. Large copies
+ of more than 96 bytes align the destination and use an unrolled loop
+ processing 64 bytes per iteration.
+ Small and medium copies read all data before writing, allowing any
+ kind of overlap, and memmove tailcalls memcpy for these cases as
+ well as non-overlapping copies.
+*/
+
+ prfm PLDL1KEEP, [src]
+ add srcend, src, count
+ add dstend, dstin, count
+ cmp count, 16
+ b.ls L(copy16)
+ cmp count, 96
+ b.hi L(copy_long)
+
+ /* Medium copies: 17..96 bytes. */
+ sub tmp1, count, 1
+ ldp A_l, A_h, [src]
+ tbnz tmp1, 6, L(copy96)
+ ldp D_l, D_h, [srcend, -16]
+ tbz tmp1, 5, 1f
+ ldp B_l, B_h, [src, 16]
+ ldp C_l, C_h, [srcend, -32]
+ stp B_l, B_h, [dstin, 16]
+ stp C_l, C_h, [dstend, -32]
1:
+ stp A_l, A_h, [dstin]
+ stp D_l, D_h, [dstend, -16]
ret
-.Ltail15tiny:
- /* Copy up to 15 bytes of data. Does not assume additional data
- being copied. */
- tbz count, #3, 1f
- ldr tmp1, [src], #8
- str tmp1, [dst], #8
-1:
- tbz count, #2, 1f
- ldr tmp1w, [src], #4
- str tmp1w, [dst], #4
-1:
- tbz count, #1, 1f
- ldrh tmp1w, [src], #2
- strh tmp1w, [dst], #2
-1:
- tbz count, #0, 1f
- ldrb tmp1w, [src]
- strb tmp1w, [dst]
+ .p2align 4
+
+ /* Small copies: 0..16 bytes. */
+L(copy16):
+ cmp count, 8
+ b.lo 1f
+ ldr A_l, [src]
+ ldr A_h, [srcend, -8]
+ str A_l, [dstin]
+ str A_h, [dstend, -8]
+ ret
+ .p2align 4
1:
+ tbz count, 2, 1f
+ ldr A_lw, [src]
+ ldr A_hw, [srcend, -4]
+ str A_lw, [dstin]
+ str A_hw, [dstend, -4]
ret
-.Lcpy_not_short:
- /* We don't much care about the alignment of DST, but we want SRC
- * to be 128-bit (16 byte) aligned so that we don't cross cache line
- * boundaries on both loads and stores. */
- neg tmp2, src
- ands tmp2, tmp2, #15 /* Bytes to reach alignment. */
- b.eq 2f
- sub count, count, tmp2
- /* Copy more data than needed; it's faster than jumping
- * around copying sub-Quadword quantities. We know that
- * it can't overrun. */
- ldp A_l, A_h, [src]
- add src, src, tmp2
- stp A_l, A_h, [dst]
- add dst, dst, tmp2
- /* There may be less than 63 bytes to go now. */
- cmp count, #63
- b.le .Ltail63
-2:
- subs count, count, #128
- b.ge .Lcpy_body_large
- /* Less than 128 bytes to copy, so handle 64 here and then jump
- * to the tail. */
- ldp A_l, A_h, [src]
- ldp B_l, B_h, [src, #16]
- ldp C_l, C_h, [src, #32]
- ldp D_l, D_h, [src, #48]
- stp A_l, A_h, [dst]
- stp B_l, B_h, [dst, #16]
- stp C_l, C_h, [dst, #32]
- stp D_l, D_h, [dst, #48]
- tst count, #0x3f
- add src, src, #64
- add dst, dst, #64
- b.ne .Ltail63
+ /* Copy 0..3 bytes. Use a branchless sequence that copies the same
+ byte 3 times if count==1, or the 2nd byte twice if count==2. */
+1:
+ cbz count, 2f
+ lsr tmp1, count, 1
+ ldrb A_lw, [src]
+ ldrb A_hw, [srcend, -1]
+ ldrb B_lw, [src, tmp1]
+ strb A_lw, [dstin]
+ strb B_lw, [dstin, tmp1]
+ strb A_hw, [dstend, -1]
+2: ret
+
+ .p2align 4
+ /* Copy 64..96 bytes. Copy 64 bytes from the start and
+ 32 bytes from the end. */
+L(copy96):
+ ldp B_l, B_h, [src, 16]
+ ldp C_l, C_h, [src, 32]
+ ldp D_l, D_h, [src, 48]
+ ldp E_l, E_h, [srcend, -32]
+ ldp F_l, F_h, [srcend, -16]
+ stp A_l, A_h, [dstin]
+ stp B_l, B_h, [dstin, 16]
+ stp C_l, C_h, [dstin, 32]
+ stp D_l, D_h, [dstin, 48]
+ stp E_l, E_h, [dstend, -32]
+ stp F_l, F_h, [dstend, -16]
ret
- /* Critical loop. Start at a new cache line boundary. Assuming
- * 64 bytes per line this ensures the entire loop is in one line. */
- .p2align 6
-.Lcpy_body_large:
- /* There are at least 128 bytes to copy. */
- ldp A_l, A_h, [src, #0]
- sub dst, dst, #16 /* Pre-bias. */
- ldp B_l, B_h, [src, #16]
- ldp C_l, C_h, [src, #32]
- ldp D_l, D_h, [src, #48]! /* src += 64 - Pre-bias. */
+ /* Align DST to 16 byte alignment so that we don't cross cache line
+ boundaries on both loads and stores. There are at least 96 bytes
+ to copy, so copy 16 bytes unaligned and then align. The loop
+ copies 64 bytes per iteration and prefetches one iteration ahead. */
+
+ .p2align 4
+L(copy_long):
+ and tmp1, dstin, 15
+ bic dst, dstin, 15
+ ldp D_l, D_h, [src]
+ sub src, src, tmp1
+ add count, count, tmp1 /* Count is now 16 too large. */
+ ldp A_l, A_h, [src, 16]
+ stp D_l, D_h, [dstin]
+ ldp B_l, B_h, [src, 32]
+ ldp C_l, C_h, [src, 48]
+ ldp D_l, D_h, [src, 64]!
+ subs count, count, 128 + 16 /* Test and readjust count. */
+ b.ls 2f
1:
- stp A_l, A_h, [dst, #16]
- ldp A_l, A_h, [src, #16]
- stp B_l, B_h, [dst, #32]
- ldp B_l, B_h, [src, #32]
- stp C_l, C_h, [dst, #48]
- ldp C_l, C_h, [src, #48]
- stp D_l, D_h, [dst, #64]!
- ldp D_l, D_h, [src, #64]!
- subs count, count, #64
- b.ge 1b
- stp A_l, A_h, [dst, #16]
- stp B_l, B_h, [dst, #32]
- stp C_l, C_h, [dst, #48]
- stp D_l, D_h, [dst, #64]
- add src, src, #16
- add dst, dst, #64 + 16
- tst count, #0x3f
- b.ne .Ltail63
+ stp A_l, A_h, [dst, 16]
+ ldp A_l, A_h, [src, 16]
+ stp B_l, B_h, [dst, 32]
+ ldp B_l, B_h, [src, 32]
+ stp C_l, C_h, [dst, 48]
+ ldp C_l, C_h, [src, 48]
+ stp D_l, D_h, [dst, 64]!
+ ldp D_l, D_h, [src, 64]!
+ subs count, count, 64
+ b.hi 1b
+
+ /* Write the last full set of 64 bytes. The remainder is at most 64
+ bytes, so it is safe to always copy 64 bytes from the end even if
+ there is just 1 byte left. */
+2:
+ ldp E_l, E_h, [srcend, -64]
+ stp A_l, A_h, [dst, 16]
+ ldp A_l, A_h, [srcend, -48]
+ stp B_l, B_h, [dst, 32]
+ ldp B_l, B_h, [srcend, -32]
+ stp C_l, C_h, [dst, 48]
+ ldp C_l, C_h, [srcend, -16]
+ stp D_l, D_h, [dst, 64]
+ stp E_l, E_h, [dstend, -64]
+ stp A_l, A_h, [dstend, -48]
+ stp B_l, B_h, [dstend, -32]
+ stp C_l, C_h, [dstend, -16]
ret
diff --git a/libc/arch-arm64/generic/bionic/memmove.S b/libc/arch-arm64/generic/bionic/memmove.S
index 8b366a3..c50112d 100644
--- a/libc/arch-arm64/generic/bionic/memmove.S
+++ b/libc/arch-arm64/generic/bionic/memmove.S
@@ -1,4 +1,4 @@
-/* Copyright (c) 2014, Linaro Limited
+/* Copyright (c) 2013, Linaro Limited
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -22,319 +22,131 @@
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
+
+/*
+ * Copyright (c) 2015 ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
/* Assumptions:
*
- * ARMv8-a, AArch64
- * Unaligned accesses
- * wchar_t is 4 bytes
+ * ARMv8-a, AArch64, unaligned accesses, wchar_t is 4 bytes
*/
#include <private/bionic_asm.h>
/* Parameters and result. */
-#ifdef BCOPY
-#define origdstin x1
-#define origsrc x0
-#endif
#define dstin x0
#define src x1
#define count x2
-#define tmp1 x3
-#define tmp1w w3
-#define tmp2 x4
-#define tmp2w w4
-#define tmp3 x5
-#define tmp3w w5
-#define dst x6
-
-#define A_l x7
-#define A_h x8
-#define B_l x9
-#define B_h x10
-#define C_l x11
-#define C_h x12
-#define D_l x13
-#define D_h x14
+#define srcend x3
+#define dstend x4
+#define tmp1 x5
+#define A_l x6
+#define A_h x7
+#define B_l x8
+#define B_h x9
+#define C_l x10
+#define C_h x11
+#define D_l x12
+#define D_h x13
+#define E_l count
+#define E_h tmp1
+
+/* All memmoves up to 96 bytes are done by memcpy as it supports overlaps.
+ Larger backwards copies are also handled by memcpy. The only remaining
+ case is forward large copies. The destination is aligned, and an
+ unrolled loop processes 64 bytes per iteration.
+*/
-#ifdef BCOPY
-ENTRY(bcopy)
- /* Swap src and dst so that a branch to memcpy doesn't cause issues. */
- mov tmp1, origsrc
- mov origsrc, origdstin
- mov origdstin, tmp1
-#elif defined(WMEMMOVE)
+#if defined(WMEMMOVE)
ENTRY(wmemmove)
lsl count, count, #2
#else
ENTRY(memmove)
#endif
- cmp dstin, src
- b.lo .Ldownwards
- add tmp1, src, count
- cmp dstin, tmp1
- b.hs memcpy /* No overlap. */
-
- /* Upwards move with potential overlap.
- * Need to move from the tail backwards. SRC and DST point one
- * byte beyond the remaining data to move. */
- add dst, dstin, count
- add src, src, count
- cmp count, #64
- b.ge .Lmov_not_short_up
-
- /* Deal with small moves quickly by dropping straight into the
- * exit block. */
-.Ltail63up:
- /* Move up to 48 bytes of data. At this point we only need the
- * bottom 6 bits of count to be accurate. */
- ands tmp1, count, #0x30
- b.eq .Ltail15up
- sub dst, dst, tmp1
- sub src, src, tmp1
- cmp tmp1w, #0x20
- b.eq 1f
- b.lt 2f
- ldp A_l, A_h, [src, #32]
- stp A_l, A_h, [dst, #32]
-1:
- ldp A_l, A_h, [src, #16]
- stp A_l, A_h, [dst, #16]
-2:
- ldp A_l, A_h, [src]
- stp A_l, A_h, [dst]
-.Ltail15up:
- /* Move up to 15 bytes of data. Does not assume additional data
- * being moved. */
- tbz count, #3, 1f
- ldr tmp1, [src, #-8]!
- str tmp1, [dst, #-8]!
-1:
- tbz count, #2, 1f
- ldr tmp1w, [src, #-4]!
- str tmp1w, [dst, #-4]!
-1:
- tbz count, #1, 1f
- ldrh tmp1w, [src, #-2]!
- strh tmp1w, [dst, #-2]!
-1:
- tbz count, #0, 1f
- ldrb tmp1w, [src, #-1]
- strb tmp1w, [dst, #-1]
-1:
- ret
-
-.Lmov_not_short_up:
- /* We don't much care about the alignment of DST, but we want SRC
- * to be 128-bit (16 byte) aligned so that we don't cross cache line
- * boundaries on both loads and stores. */
- ands tmp2, src, #15 /* Bytes to reach alignment. */
- b.eq 2f
- sub count, count, tmp2
- /* Move enough data to reach alignment; unlike memcpy, we have to
- * be aware of the overlap, which means we can't move data twice. */
- tbz tmp2, #3, 1f
- ldr tmp1, [src, #-8]!
- str tmp1, [dst, #-8]!
-1:
- tbz tmp2, #2, 1f
- ldr tmp1w, [src, #-4]!
- str tmp1w, [dst, #-4]!
-1:
- tbz tmp2, #1, 1f
- ldrh tmp1w, [src, #-2]!
- strh tmp1w, [dst, #-2]!
-1:
- tbz tmp2, #0, 1f
- ldrb tmp1w, [src, #-1]!
- strb tmp1w, [dst, #-1]!
-1:
-
- /* There may be less than 63 bytes to go now. */
- cmp count, #63
- b.le .Ltail63up
+ sub tmp1, dstin, src
+ cmp count, 96
+ ccmp tmp1, count, 2, hi
+ b.hs memcpy
+
+ cbz tmp1, 3f
+ add dstend, dstin, count
+ add srcend, src, count
+
+ /* Align dstend to 16 byte alignment so that we don't cross cache line
+ boundaries on both loads and stores. There are at least 96 bytes
+ to copy, so copy 16 bytes unaligned and then align. The loop
+ copies 64 bytes per iteration and prefetches one iteration ahead. */
+
+ and tmp1, dstend, 15
+ ldp D_l, D_h, [srcend, -16]
+ sub srcend, srcend, tmp1
+ sub count, count, tmp1
+ ldp A_l, A_h, [srcend, -16]
+ stp D_l, D_h, [dstend, -16]
+ ldp B_l, B_h, [srcend, -32]
+ ldp C_l, C_h, [srcend, -48]
+ ldp D_l, D_h, [srcend, -64]!
+ sub dstend, dstend, tmp1
+ subs count, count, 128
+ b.ls 2f
+ nop
+1:
+ stp A_l, A_h, [dstend, -16]
+ ldp A_l, A_h, [srcend, -16]
+ stp B_l, B_h, [dstend, -32]
+ ldp B_l, B_h, [srcend, -32]
+ stp C_l, C_h, [dstend, -48]
+ ldp C_l, C_h, [srcend, -48]
+ stp D_l, D_h, [dstend, -64]!
+ ldp D_l, D_h, [srcend, -64]!
+ subs count, count, 64
+ b.hi 1b
+
+ /* Write the last full set of 64 bytes. The remainder is at most 64
+ bytes, so it is safe to always copy 64 bytes from the start even if
+ there is just 1 byte left. */
2:
- subs count, count, #128
- b.ge .Lmov_body_large_up
- /* Less than 128 bytes to move, so handle 64 here and then jump
- * to the tail. */
- ldp A_l, A_h, [src, #-64]!
- ldp B_l, B_h, [src, #16]
- ldp C_l, C_h, [src, #32]
- ldp D_l, D_h, [src, #48]
- stp A_l, A_h, [dst, #-64]!
- stp B_l, B_h, [dst, #16]
- stp C_l, C_h, [dst, #32]
- stp D_l, D_h, [dst, #48]
- tst count, #0x3f
- b.ne .Ltail63up
- ret
-
- /* Critical loop. Start at a new Icache line boundary. Assuming
- * 64 bytes per line this ensures the entire loop is in one line. */
- .p2align 6
-.Lmov_body_large_up:
- /* There are at least 128 bytes to move. */
- ldp A_l, A_h, [src, #-16]
- ldp B_l, B_h, [src, #-32]
- ldp C_l, C_h, [src, #-48]
- ldp D_l, D_h, [src, #-64]!
-1:
- stp A_l, A_h, [dst, #-16]
- ldp A_l, A_h, [src, #-16]
- stp B_l, B_h, [dst, #-32]
- ldp B_l, B_h, [src, #-32]
- stp C_l, C_h, [dst, #-48]
- ldp C_l, C_h, [src, #-48]
- stp D_l, D_h, [dst, #-64]!
- ldp D_l, D_h, [src, #-64]!
- subs count, count, #64
- b.ge 1b
- stp A_l, A_h, [dst, #-16]
- stp B_l, B_h, [dst, #-32]
- stp C_l, C_h, [dst, #-48]
- stp D_l, D_h, [dst, #-64]!
- tst count, #0x3f
- b.ne .Ltail63up
- ret
-
-
-.Ldownwards:
- /* For a downwards move we can safely use memcpy provided that
- * DST is more than 16 bytes away from SRC. */
- sub tmp1, src, #16
- cmp dstin, tmp1
- b.ls memcpy /* May overlap, but not critically. */
-
- mov dst, dstin /* Preserve DSTIN for return value. */
- cmp count, #64
- b.ge .Lmov_not_short_down
-
- /* Deal with small moves quickly by dropping straight into the
- * exit block. */
-.Ltail63down:
- /* Move up to 48 bytes of data. At this point we only need the
- * bottom 6 bits of count to be accurate. */
- ands tmp1, count, #0x30
- b.eq .Ltail15down
- add dst, dst, tmp1
- add src, src, tmp1
- cmp tmp1w, #0x20
- b.eq 1f
- b.lt 2f
- ldp A_l, A_h, [src, #-48]
- stp A_l, A_h, [dst, #-48]
-1:
- ldp A_l, A_h, [src, #-32]
- stp A_l, A_h, [dst, #-32]
-2:
- ldp A_l, A_h, [src, #-16]
- stp A_l, A_h, [dst, #-16]
-.Ltail15down:
- /* Move up to 15 bytes of data. Does not assume additional data
- being moved. */
- tbz count, #3, 1f
- ldr tmp1, [src], #8
- str tmp1, [dst], #8
-1:
- tbz count, #2, 1f
- ldr tmp1w, [src], #4
- str tmp1w, [dst], #4
-1:
- tbz count, #1, 1f
- ldrh tmp1w, [src], #2
- strh tmp1w, [dst], #2
-1:
- tbz count, #0, 1f
- ldrb tmp1w, [src]
- strb tmp1w, [dst]
-1:
- ret
-
-.Lmov_not_short_down:
- /* We don't much care about the alignment of DST, but we want SRC
- * to be 128-bit (16 byte) aligned so that we don't cross cache line
- * boundaries on both loads and stores. */
- neg tmp2, src
- ands tmp2, tmp2, #15 /* Bytes to reach alignment. */
- b.eq 2f
- sub count, count, tmp2
- /* Move enough data to reach alignment; unlike memcpy, we have to
- * be aware of the overlap, which means we can't move data twice. */
- tbz tmp2, #3, 1f
- ldr tmp1, [src], #8
- str tmp1, [dst], #8
-1:
- tbz tmp2, #2, 1f
- ldr tmp1w, [src], #4
- str tmp1w, [dst], #4
-1:
- tbz tmp2, #1, 1f
- ldrh tmp1w, [src], #2
- strh tmp1w, [dst], #2
-1:
- tbz tmp2, #0, 1f
- ldrb tmp1w, [src], #1
- strb tmp1w, [dst], #1
-1:
-
- /* There may be less than 63 bytes to go now. */
- cmp count, #63
- b.le .Ltail63down
-2:
- subs count, count, #128
- b.ge .Lmov_body_large_down
- /* Less than 128 bytes to move, so handle 64 here and then jump
- * to the tail. */
- ldp A_l, A_h, [src]
- ldp B_l, B_h, [src, #16]
- ldp C_l, C_h, [src, #32]
- ldp D_l, D_h, [src, #48]
- stp A_l, A_h, [dst]
- stp B_l, B_h, [dst, #16]
- stp C_l, C_h, [dst, #32]
- stp D_l, D_h, [dst, #48]
- tst count, #0x3f
- add src, src, #64
- add dst, dst, #64
- b.ne .Ltail63down
- ret
-
- /* Critical loop. Start at a new cache line boundary. Assuming
- * 64 bytes per line this ensures the entire loop is in one line. */
- .p2align 6
-.Lmov_body_large_down:
- /* There are at least 128 bytes to move. */
- ldp A_l, A_h, [src, #0]
- sub dst, dst, #16 /* Pre-bias. */
- ldp B_l, B_h, [src, #16]
- ldp C_l, C_h, [src, #32]
- ldp D_l, D_h, [src, #48]! /* src += 64 - Pre-bias. */
-1:
- stp A_l, A_h, [dst, #16]
- ldp A_l, A_h, [src, #16]
- stp B_l, B_h, [dst, #32]
- ldp B_l, B_h, [src, #32]
- stp C_l, C_h, [dst, #48]
- ldp C_l, C_h, [src, #48]
- stp D_l, D_h, [dst, #64]!
- ldp D_l, D_h, [src, #64]!
- subs count, count, #64
- b.ge 1b
- stp A_l, A_h, [dst, #16]
- stp B_l, B_h, [dst, #32]
- stp C_l, C_h, [dst, #48]
- stp D_l, D_h, [dst, #64]
- add src, src, #16
- add dst, dst, #64 + 16
- tst count, #0x3f
- b.ne .Ltail63down
- ret
-#ifdef BCOPY
-END(bcopy)
-#elif defined(WMEMMOVE)
+ ldp E_l, E_h, [src, 48]
+ stp A_l, A_h, [dstend, -16]
+ ldp A_l, A_h, [src, 32]
+ stp B_l, B_h, [dstend, -32]
+ ldp B_l, B_h, [src, 16]
+ stp C_l, C_h, [dstend, -48]
+ ldp C_l, C_h, [src]
+ stp D_l, D_h, [dstend, -64]
+ stp E_l, E_h, [dstin, 48]
+ stp A_l, A_h, [dstin, 32]
+ stp B_l, B_h, [dstin, 16]
+ stp C_l, C_h, [dstin]
+3: ret
+
+#if defined(WMEMMOVE)
END(wmemmove)
#else
END(memmove)
diff --git a/libc/arch-arm64/generic/bionic/memset.S b/libc/arch-arm64/generic/bionic/memset.S
index 7c204b4..4b3b17b 100644
--- a/libc/arch-arm64/generic/bionic/memset.S
+++ b/libc/arch-arm64/generic/bionic/memset.S
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012, Linaro Limited
+/* Copyright (c) 2012-2013, Linaro Limited
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -22,226 +22,207 @@
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
+
+/*
+ * Copyright (c) 2015 ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
/* Assumptions:
*
- * ARMv8-a, AArch64
- * Unaligned accesses
+ * ARMv8-a, AArch64, unaligned accesses
*
*/
#include <private/bionic_asm.h>
-/* By default we assume that the DC instruction can be used to zero
- data blocks more efficiently. In some circumstances this might be
- unsafe, for example in an asymmetric multiprocessor environment with
- different DC clear lengths (neither the upper nor lower lengths are
- safe to use).
-
- If code may be run in a virtualized environment, then define
- MAYBE_VIRT. This will cause the code to cache the system register
- values rather than re-reading them each call. */
-
-#define dstin x0
-#ifdef BZERO
-#define count x1
-#else
-#define count x2
-#endif
-#define val w1
-#define tmp1 x3
-#define tmp1w w3
-#define tmp2 x4
-#define tmp2w w4
-#define zva_len_x x5
-#define zva_len w5
-#define zva_bits_x x6
-
-#define A_l x7
-#define A_lw w7
-#define dst x8
-#define tmp3w w9
-
-#ifdef BZERO
-ENTRY(bzero)
-#else
+#define dstin x0
+#define val x1
+#define valw w1
+#define count x2
+#define dst x3
+#define dstend x4
+#define tmp1 x5
+#define tmp1w w5
+#define tmp2 x6
+#define tmp2w w6
+#define zva_len x7
+#define zva_lenw w7
+
+#define L(l) .L ## l
+
ENTRY(memset)
-#endif
-
- mov dst, dstin /* Preserve return value. */
-#ifdef BZERO
- b .Lzero_mem
-#endif
- ands A_lw, val, #255
- b.eq .Lzero_mem
- orr A_lw, A_lw, A_lw, lsl #8
- orr A_lw, A_lw, A_lw, lsl #16
- orr A_l, A_l, A_l, lsl #32
-.Ltail_maybe_long:
- cmp count, #64
- b.ge .Lnot_short
-.Ltail_maybe_tiny:
- cmp count, #15
- b.le .Ltail15tiny
-.Ltail63:
- ands tmp1, count, #0x30
- b.eq .Ltail15
- add dst, dst, tmp1
- cmp tmp1w, #0x20
- b.eq 1f
- b.lt 2f
- stp A_l, A_l, [dst, #-48]
-1:
- stp A_l, A_l, [dst, #-32]
-2:
- stp A_l, A_l, [dst, #-16]
-
-.Ltail15:
- and count, count, #15
- add dst, dst, count
- stp A_l, A_l, [dst, #-16] /* Repeat some/all of last store. */
- ret
-.Ltail15tiny:
- /* Set up to 15 bytes. Does not assume earlier memory
- being set. */
- tbz count, #3, 1f
- str A_l, [dst], #8
-1:
- tbz count, #2, 1f
- str A_lw, [dst], #4
-1:
- tbz count, #1, 1f
- strh A_lw, [dst], #2
-1:
- tbz count, #0, 1f
- strb A_lw, [dst]
-1:
+ dup v0.16B, valw
+ add dstend, dstin, count
+
+ cmp count, 96
+ b.hi L(set_long)
+ cmp count, 16
+ b.hs L(set_medium)
+ mov val, v0.D[0]
+
+ /* Set 0..15 bytes. */
+ tbz count, 3, 1f
+ str val, [dstin]
+ str val, [dstend, -8]
+ ret
+ nop
+1: tbz count, 2, 2f
+ str valw, [dstin]
+ str valw, [dstend, -4]
+ ret
+2: cbz count, 3f
+ strb valw, [dstin]
+ tbz count, 1, 3f
+ strh valw, [dstend, -2]
+3: ret
+
+ /* Set 17..96 bytes. */
+L(set_medium):
+ str q0, [dstin]
+ tbnz count, 6, L(set96)
+ str q0, [dstend, -16]
+ tbz count, 5, 1f
+ str q0, [dstin, 16]
+ str q0, [dstend, -32]
+1: ret
+
+ .p2align 4
+ /* Set 64..96 bytes. Write 64 bytes from the start and
+ 32 bytes from the end. */
+L(set96):
+ str q0, [dstin, 16]
+ stp q0, q0, [dstin, 32]
+ stp q0, q0, [dstend, -32]
ret
- /* Critical loop. Start at a new cache line boundary. Assuming
- * 64 bytes per line, this ensures the entire loop is in one line. */
- .p2align 6
-.Lnot_short:
- neg tmp2, dst
- ands tmp2, tmp2, #15
- b.eq 2f
- /* Bring DST to 128-bit (16-byte) alignment. We know that there's
- * more than that to set, so we simply store 16 bytes and advance by
- * the amount required to reach alignment. */
- sub count, count, tmp2
- stp A_l, A_l, [dst]
- add dst, dst, tmp2
- /* There may be less than 63 bytes to go now. */
- cmp count, #63
- b.le .Ltail63
-2:
- sub dst, dst, #16 /* Pre-bias. */
- sub count, count, #64
-1:
- stp A_l, A_l, [dst, #16]
- stp A_l, A_l, [dst, #32]
- stp A_l, A_l, [dst, #48]
- stp A_l, A_l, [dst, #64]!
- subs count, count, #64
- b.ge 1b
- tst count, #0x3f
- add dst, dst, #16
- b.ne .Ltail63
+ .p2align 3
+ nop
+L(set_long):
+ and valw, valw, 255
+ bic dst, dstin, 15
+ str q0, [dstin]
+ cmp count, 256
+ ccmp valw, 0, 0, cs
+ b.eq L(try_zva)
+L(no_zva):
+ sub count, dstend, dst /* Count is 16 too large. */
+ add dst, dst, 16
+ sub count, count, 64 + 16 /* Adjust count and bias for loop. */
+1: stp q0, q0, [dst], 64
+ stp q0, q0, [dst, -32]
+L(tail64):
+ subs count, count, 64
+ b.hi 1b
+2: stp q0, q0, [dstend, -64]
+ stp q0, q0, [dstend, -32]
ret
- /* For zeroing memory, check to see if we can use the ZVA feature to
- * zero entire 'cache' lines. */
-.Lzero_mem:
- mov A_l, #0
- cmp count, #63
- b.le .Ltail_maybe_tiny
- neg tmp2, dst
- ands tmp2, tmp2, #15
- b.eq 1f
- sub count, count, tmp2
- stp A_l, A_l, [dst]
- add dst, dst, tmp2
- cmp count, #63
- b.le .Ltail63
-1:
- /* For zeroing small amounts of memory, it's not worth setting up
- * the line-clear code. */
- cmp count, #128
- b.lt .Lnot_short
-#ifdef MAYBE_VIRT
- /* For efficiency when virtualized, we cache the ZVA capability. */
- adrp tmp2, .Lcache_clear
- ldr zva_len, [tmp2, #:lo12:.Lcache_clear]
- tbnz zva_len, #31, .Lnot_short
- cbnz zva_len, .Lzero_by_line
+ .p2align 3
+L(try_zva):
mrs tmp1, dczid_el0
- tbz tmp1, #4, 1f
- /* ZVA not available. Remember this for next time. */
- mov zva_len, #~0
- str zva_len, [tmp2, #:lo12:.Lcache_clear]
- b .Lnot_short
-1:
- mov tmp3w, #4
- and zva_len, tmp1w, #15 /* Safety: other bits reserved. */
- lsl zva_len, tmp3w, zva_len
- str zva_len, [tmp2, #:lo12:.Lcache_clear]
-#else
- mrs tmp1, dczid_el0
- tbnz tmp1, #4, .Lnot_short
- mov tmp3w, #4
- and zva_len, tmp1w, #15 /* Safety: other bits reserved. */
- lsl zva_len, tmp3w, zva_len
-#endif
-
-.Lzero_by_line:
- /* Compute how far we need to go to become suitably aligned. We're
- * already at quad-word alignment. */
- cmp count, zva_len_x
- b.lt .Lnot_short /* Not enough to reach alignment. */
- sub zva_bits_x, zva_len_x, #1
- neg tmp2, dst
- ands tmp2, tmp2, zva_bits_x
- b.eq 1f /* Already aligned. */
- /* Not aligned, check that there's enough to copy after alignment. */
- sub tmp1, count, tmp2
- cmp tmp1, #64
- ccmp tmp1, zva_len_x, #8, ge /* NZCV=0b1000 */
- b.lt .Lnot_short
- /* We know that there's at least 64 bytes to zero and that it's safe
- * to overrun by 64 bytes. */
- mov count, tmp1
-2:
- stp A_l, A_l, [dst]
- stp A_l, A_l, [dst, #16]
- stp A_l, A_l, [dst, #32]
- subs tmp2, tmp2, #64
- stp A_l, A_l, [dst, #48]
- add dst, dst, #64
- b.ge 2b
- /* We've overrun a bit, so adjust dst downwards. */
- add dst, dst, tmp2
-1:
- sub count, count, zva_len_x
-3:
- dc zva, dst
- add dst, dst, zva_len_x
- subs count, count, zva_len_x
- b.ge 3b
- ands count, count, zva_bits_x
- b.ne .Ltail_maybe_long
+ tbnz tmp1w, 4, L(no_zva)
+ and tmp1w, tmp1w, 15
+ cmp tmp1w, 4 /* ZVA size is 64 bytes. */
+ b.ne L(zva_128)
+
+ /* Write the first and last 64 byte aligned block using stp rather
+ than using DC ZVA. This is faster on some cores.
+ */
+L(zva_64):
+ str q0, [dst, 16]
+ stp q0, q0, [dst, 32]
+ bic dst, dst, 63
+ stp q0, q0, [dst, 64]
+ stp q0, q0, [dst, 96]
+ sub count, dstend, dst /* Count is now 128 too large. */
+ sub count, count, 128+64+64 /* Adjust count and bias for loop. */
+ add dst, dst, 128
+ nop
+1: dc zva, dst
+ add dst, dst, 64
+ subs count, count, 64
+ b.hi 1b
+ stp q0, q0, [dst, 0]
+ stp q0, q0, [dst, 32]
+ stp q0, q0, [dstend, -64]
+ stp q0, q0, [dstend, -32]
ret
-#ifdef BZERO
-END(bzero)
-#else
+
+ .p2align 3
+L(zva_128):
+ cmp tmp1w, 5 /* ZVA size is 128 bytes. */
+ b.ne L(zva_other)
+
+ str q0, [dst, 16]
+ stp q0, q0, [dst, 32]
+ stp q0, q0, [dst, 64]
+ stp q0, q0, [dst, 96]
+ bic dst, dst, 127
+ sub count, dstend, dst /* Count is now 128 too large. */
+ sub count, count, 128+128 /* Adjust count and bias for loop. */
+ add dst, dst, 128
+1: dc zva, dst
+ add dst, dst, 128
+ subs count, count, 128
+ b.hi 1b
+ stp q0, q0, [dstend, -128]
+ stp q0, q0, [dstend, -96]
+ stp q0, q0, [dstend, -64]
+ stp q0, q0, [dstend, -32]
+ ret
+
+L(zva_other):
+ mov tmp2w, 4
+ lsl zva_lenw, tmp2w, tmp1w
+ add tmp1, zva_len, 64 /* Max alignment bytes written. */
+ cmp count, tmp1
+ blo L(no_zva)
+
+ sub tmp2, zva_len, 1
+ add tmp1, dst, zva_len
+ add dst, dst, 16
+ subs count, tmp1, dst /* Actual alignment bytes to write. */
+ bic tmp1, tmp1, tmp2 /* Aligned dc zva start address. */
+ beq 2f
+1: stp q0, q0, [dst], 64
+ stp q0, q0, [dst, -32]
+ subs count, count, 64
+ b.hi 1b
+2: mov dst, tmp1
+ sub count, dstend, tmp1 /* Remaining bytes to write. */
+ subs count, count, zva_len
+ b.lo 4f
+3: dc zva, dst
+ add dst, dst, zva_len
+ subs count, count, zva_len
+ b.hs 3b
+4: add count, count, zva_len
+ b L(tail64)
+
END(memset)
-#endif
-
-#ifdef MAYBE_VIRT
- .bss
- .p2align 2
-.Lcache_clear:
- .space 4
-#endif
diff --git a/libc/arch-arm64/generic/bionic/strlen.S b/libc/arch-arm64/generic/bionic/strlen.S
index 3bd9809..6e540fc 100644
--- a/libc/arch-arm64/generic/bionic/strlen.S
+++ b/libc/arch-arm64/generic/bionic/strlen.S
@@ -1,16 +1,16 @@
-/* Copyright (c) 2014, Linaro Limited
+/* Copyright (c) 2013-2015, Linaro Limited
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
+ notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
* Neither the name of the Linaro nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
@@ -22,16 +22,19 @@
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* Assumptions:
*
- * ARMv8-a, AArch64
+ * ARMv8-a, AArch64, unaligned accesses, min page size 4k.
*/
#include <private/bionic_asm.h>
+/* To test the page crossing code path more thoroughly, compile with
+ -DTEST_PAGE_CROSS - this will force all calls through the slower
+ entry path. This option is not intended for production use. */
+
/* Arguments and results. */
#define srcin x0
#define len x0
@@ -40,87 +43,185 @@
#define src x1
#define data1 x2
#define data2 x3
-#define data2a x4
-#define has_nul1 x5
-#define has_nul2 x6
-#define tmp1 x7
-#define tmp2 x8
-#define tmp3 x9
-#define tmp4 x10
-#define zeroones x11
-#define pos x12
+#define has_nul1 x4
+#define has_nul2 x5
+#define tmp1 x4
+#define tmp2 x5
+#define tmp3 x6
+#define tmp4 x7
+#define zeroones x8
+
+#define L(l) .L ## l
+
+ /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+ (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+ can be done in parallel across the entire word. A faster check
+ (X - 1) & 0x80 is zero for non-NUL ASCII characters, but gives
+ false hits for characters 129..255. */
#define REP8_01 0x0101010101010101
#define REP8_7f 0x7f7f7f7f7f7f7f7f
#define REP8_80 0x8080808080808080
- /* Start of critial section -- keep to one 64Byte cache line. */
+#ifdef TEST_PAGE_CROSS
+# define MIN_PAGE_SIZE 15
+#else
+# define MIN_PAGE_SIZE 4096
+#endif
+
+ /* Since strings are short on average, we check the first 16 bytes
+ of the string for a NUL character. In order to do an unaligned ldp
+ safely we have to do a page cross check first. If there is a NUL
+ byte we calculate the length from the 2 8-byte words using
+ conditional select to reduce branch mispredictions (it is unlikely
+ strlen will be repeatedly called on strings with the same length).
+
+ If the string is longer than 16 bytes, we align src so don't need
+ further page cross checks, and process 32 bytes per iteration
+ using the fast NUL check. If we encounter non-ASCII characters,
+ fallback to a second loop using the full NUL check.
+
+ If the page cross check fails, we read 16 bytes from an aligned
+ address, remove any characters before the string, and continue
+ in the main loop using aligned loads. Since strings crossing a
+ page in the first 16 bytes are rare (probability of
+ 16/MIN_PAGE_SIZE ~= 0.4%), this case does not need to be optimized.
+
+ AArch64 systems have a minimum page size of 4k. We don't bother
+ checking for larger page sizes - the cost of setting up the correct
+ page size is just not worth the extra gain from a small reduction in
+ the cases taking the slow path. Note that we only care about
+ whether the first fetch, which may be misaligned, crosses a page
+ boundary. */
+
ENTRY(strlen)
- mov zeroones, #REP8_01
- bic src, srcin, #15
- ands tmp1, srcin, #15
- b.ne .Lmisaligned
- /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
- (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
- can be done in parallel across the entire word. */
- /* The inner loop deals with two Dwords at a time. This has a
- slightly higher start-up cost, but we should win quite quickly,
- especially on cores with a high number of issue slots per
- cycle, as we get much better parallelism out of the operations. */
-.Lloop:
- ldp data1, data2, [src], #16
-.Lrealigned:
+ and tmp1, srcin, MIN_PAGE_SIZE - 1
+ mov zeroones, REP8_01
+ cmp tmp1, MIN_PAGE_SIZE - 16
+ b.gt L(page_cross)
+ ldp data1, data2, [srcin]
+#ifdef __AARCH64EB__
+ /* For big-endian, carry propagation (if the final byte in the
+ string is 0x01) means we cannot use has_nul1/2 directly.
+ Since we expect strings to be small and early-exit,
+ byte-swap the data now so has_null1/2 will be correct. */
+ rev data1, data1
+ rev data2, data2
+#endif
sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
+ orr tmp2, data1, REP8_7f
sub tmp3, data2, zeroones
- orr tmp4, data2, #REP8_7f
- bic has_nul1, tmp1, tmp2
- bics has_nul2, tmp3, tmp4
- ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */
- b.eq .Lloop
- /* End of critical section -- keep to one 64Byte cache line. */
+ orr tmp4, data2, REP8_7f
+ bics has_nul1, tmp1, tmp2
+ bic has_nul2, tmp3, tmp4
+ ccmp has_nul2, 0, 0, eq
+ beq L(main_loop_entry)
- sub len, src, srcin
- cbz has_nul1, .Lnul_in_data2
-#ifdef __AARCH64EB__
- mov data2, data1
-#endif
- sub len, len, #8
- mov has_nul2, has_nul1
-.Lnul_in_data2:
+ /* Enter with C = has_nul1 == 0. */
+ csel has_nul1, has_nul1, has_nul2, cc
+ mov len, 8
+ rev has_nul1, has_nul1
+ clz tmp1, has_nul1
+ csel len, xzr, len, cc
+ add len, len, tmp1, lsr 3
+ ret
+
+ /* The inner loop processes 32 bytes per iteration and uses the fast
+ NUL check. If we encounter non-ASCII characters, use a second
+ loop with the accurate NUL check. */
+ .p2align 4
+L(main_loop_entry):
+ bic src, srcin, 15
+ sub src, src, 16
+L(main_loop):
+ ldp data1, data2, [src, 32]!
+.Lpage_cross_entry:
+ sub tmp1, data1, zeroones
+ sub tmp3, data2, zeroones
+ orr tmp2, tmp1, tmp3
+ tst tmp2, zeroones, lsl 7
+ bne 1f
+ ldp data1, data2, [src, 16]
+ sub tmp1, data1, zeroones
+ sub tmp3, data2, zeroones
+ orr tmp2, tmp1, tmp3
+ tst tmp2, zeroones, lsl 7
+ beq L(main_loop)
+ add src, src, 16
+1:
+ /* The fast check failed, so do the slower, accurate NUL check. */
+ orr tmp2, data1, REP8_7f
+ orr tmp4, data2, REP8_7f
+ bics has_nul1, tmp1, tmp2
+ bic has_nul2, tmp3, tmp4
+ ccmp has_nul2, 0, 0, eq
+ beq L(nonascii_loop)
+
+ /* Enter with C = has_nul1 == 0. */
+L(tail):
#ifdef __AARCH64EB__
/* For big-endian, carry propagation (if the final byte in the
- string is 0x01) means we cannot use has_nul directly. The
+ string is 0x01) means we cannot use has_nul1/2 directly. The
easiest way to get the correct byte is to byte-swap the data
and calculate the syndrome a second time. */
- rev data2, data2
- sub tmp1, data2, zeroones
- orr tmp2, data2, #REP8_7f
- bic has_nul2, tmp1, tmp2
+ csel data1, data1, data2, cc
+ rev data1, data1
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, REP8_7f
+ bic has_nul1, tmp1, tmp2
+#else
+ csel has_nul1, has_nul1, has_nul2, cc
#endif
- sub len, len, #8
- rev has_nul2, has_nul2
- clz pos, has_nul2
- add len, len, pos, lsr #3 /* Bits to bytes. */
+ sub len, src, srcin
+ rev has_nul1, has_nul1
+ add tmp2, len, 8
+ clz tmp1, has_nul1
+ csel len, len, tmp2, cc
+ add len, len, tmp1, lsr 3
ret
-.Lmisaligned:
- cmp tmp1, #8
- neg tmp1, tmp1
- ldp data1, data2, [src], #16
- lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
- mov tmp2, #~0
+L(nonascii_loop):
+ ldp data1, data2, [src, 16]!
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, REP8_7f
+ sub tmp3, data2, zeroones
+ orr tmp4, data2, REP8_7f
+ bics has_nul1, tmp1, tmp2
+ bic has_nul2, tmp3, tmp4
+ ccmp has_nul2, 0, 0, eq
+ bne L(tail)
+ ldp data1, data2, [src, 16]!
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, REP8_7f
+ sub tmp3, data2, zeroones
+ orr tmp4, data2, REP8_7f
+ bics has_nul1, tmp1, tmp2
+ bic has_nul2, tmp3, tmp4
+ ccmp has_nul2, 0, 0, eq
+ beq L(nonascii_loop)
+ b L(tail)
+
+ /* Load 16 bytes from [srcin & ~15] and force the bytes that precede
+ srcin to 0x7f, so we ignore any NUL bytes before the string.
+ Then continue in the aligned loop. */
+L(page_cross):
+ bic src, srcin, 15
+ ldp data1, data2, [src]
+ lsl tmp1, srcin, 3
+ mov tmp4, -1
#ifdef __AARCH64EB__
- /* Big-endian. Early bytes are at MSB. */
- lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
+ /* Big-endian. Early bytes are at MSB. */
+ lsr tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */
#else
/* Little-endian. Early bytes are at LSB. */
- lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
+ lsl tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */
#endif
- orr data1, data1, tmp2
- orr data2a, data2, tmp2
- csinv data1, data1, xzr, le
- csel data2, data2, data2a, le
- b .Lrealigned
+ orr tmp1, tmp1, REP8_80
+ orn data1, data1, tmp1
+ orn tmp2, data2, tmp1
+ tst srcin, 8
+ csel data1, data1, tmp4, eq
+ csel data2, data2, tmp2, eq
+ b L(page_cross_entry)
END(strlen)
diff --git a/libc/arch-arm64/generic/bionic/strrchr.S b/libc/arch-arm64/generic/bionic/strrchr.S
new file mode 100644
index 0000000..46b5031
--- /dev/null
+++ b/libc/arch-arm64/generic/bionic/strrchr.S
@@ -0,0 +1,171 @@
+/*
+ strrchr - find last instance of a character in a string
+
+ Copyright (c) 2014, ARM Limited
+ All rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the company nor the names of its contributors
+ may be used to endorse or promote products derived from this
+ software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * Neon Available.
+ */
+
+#include <private/bionic_asm.h>
+
+/* Arguments and results. */
+#define srcin x0
+#define chrin w1
+
+#define result x0
+
+#define src x2
+#define tmp1 x3
+#define wtmp2 w4
+#define tmp3 x5
+#define src_match x6
+#define src_offset x7
+#define const_m1 x8
+#define tmp4 x9
+#define nul_match x10
+#define chr_match x11
+
+#define vrepchr v0
+#define vdata1 v1
+#define vdata2 v2
+#define vhas_nul1 v3
+#define vhas_nul2 v4
+#define vhas_chr1 v5
+#define vhas_chr2 v6
+#define vrepmask_0 v7
+#define vrepmask_c v16
+#define vend1 v17
+#define vend2 v18
+
+/* Core algorithm.
+
+ For each 32-byte hunk we calculate a 64-bit syndrome value, with
+ two bits per byte (LSB is always in bits 0 and 1, for both big
+ and little-endian systems). For each tuple, bit 0 is set iff
+ the relevant byte matched the requested character; bit 1 is set
+ iff the relevant byte matched the NUL end of string (we trigger
+ off bit0 for the special case of looking for NUL). Since the bits
+ in the syndrome reflect exactly the order in which things occur
+ in the original string a count_trailing_zeros() operation will
+ identify exactly which byte is causing the termination, and why. */
+
+/* Locals and temporaries. */
+
+ENTRY(strrchr)
+ /* Magic constant 0x40100401 to allow us to identify which lane
+ matches the requested byte. Magic constant 0x80200802 used
+ similarly for NUL termination. */
+ mov wtmp2, #0x0401
+ movk wtmp2, #0x4010, lsl #16
+ dup vrepchr.16b, chrin
+ bic src, srcin, #31 /* Work with aligned 32-byte hunks. */
+ dup vrepmask_c.4s, wtmp2
+ mov src_offset, #0
+ ands tmp1, srcin, #31
+ add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */
+ b.eq .Laligned
+
+ /* Input string is not 32-byte aligned. Rather than forcing
+ the padding bytes to a safe value, we calculate the syndrome
+ for all the bytes, but then mask off those bits of the
+ syndrome that are related to the padding. */
+ ld1 {vdata1.16b, vdata2.16b}, [src], #32
+ neg tmp1, tmp1
+ cmeq vhas_nul1.16b, vdata1.16b, #0
+ cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
+ cmeq vhas_nul2.16b, vdata2.16b, #0
+ cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
+ and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
+ and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
+ and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
+ and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
+ addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b // 256->128
+ addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
+ addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b // 128->64
+ addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b // 128->64
+ mov nul_match, vhas_nul1.2d[0]
+ lsl tmp1, tmp1, #1
+ mov const_m1, #~0
+ mov chr_match, vhas_chr1.2d[0]
+ lsr tmp3, const_m1, tmp1
+
+ bic nul_match, nul_match, tmp3 // Mask padding bits.
+ bic chr_match, chr_match, tmp3 // Mask padding bits.
+ cbnz nul_match, .Ltail
+
+.Lloop:
+ cmp chr_match, #0
+ csel src_match, src, src_match, ne
+ csel src_offset, chr_match, src_offset, ne
+.Laligned:
+ ld1 {vdata1.16b, vdata2.16b}, [src], #32
+ cmeq vhas_nul1.16b, vdata1.16b, #0
+ cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
+ cmeq vhas_nul2.16b, vdata2.16b, #0
+ cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
+ addp vend1.16b, vhas_nul1.16b, vhas_nul2.16b // 256->128
+ and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
+ and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
+ addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
+ addp vend1.16b, vend1.16b, vend1.16b // 128->64
+ addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b // 128->64
+ mov nul_match, vend1.2d[0]
+ mov chr_match, vhas_chr1.2d[0]
+ cbz nul_match, .Lloop
+
+ and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
+ and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
+ addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b
+ addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b
+ mov nul_match, vhas_nul1.2d[0]
+
+.Ltail:
+ /* Work out exactly where the string ends. */
+ sub tmp4, nul_match, #1
+ eor tmp4, tmp4, nul_match
+ ands chr_match, chr_match, tmp4
+ /* And pick the values corresponding to the last match. */
+ csel src_match, src, src_match, ne
+ csel src_offset, chr_match, src_offset, ne
+
+ /* Count down from the top of the syndrome to find the last match. */
+ clz tmp3, src_offset
+ /* Src_match points beyond the word containing the match, so we can
+ simply subtract half the bit-offset into the syndrome. Because
+ we are counting down, we need to go back one more character. */
+ add tmp3, tmp3, #2
+ sub result, src_match, tmp3, lsr #1
+ /* But if the syndrome shows no match was found, then return NULL. */
+ cmp src_offset, #0
+ csel result, result, xzr, ne
+
+ ret
+
+END(strrchr)
diff --git a/libc/arch-arm64/generic/generic.mk b/libc/arch-arm64/generic/generic.mk
index 1b595aa..4512dc5 100644
--- a/libc/arch-arm64/generic/generic.mk
+++ b/libc/arch-arm64/generic/generic.mk
@@ -11,4 +11,5 @@ libc_bionic_src_files_arm64 += \
arch-arm64/generic/bionic/strlen.S \
arch-arm64/generic/bionic/strncmp.S \
arch-arm64/generic/bionic/strnlen.S \
+ arch-arm64/generic/bionic/strrchr.S \
arch-arm64/generic/bionic/wmemmove.S
diff --git a/libc/arch-arm64/kryo/bionic/memcpy.S b/libc/arch-arm64/kryo/bionic/memcpy.S
new file mode 100644
index 0000000..87e1b3b
--- /dev/null
+++ b/libc/arch-arm64/kryo/bionic/memcpy.S
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+// Prototype: void *memcpy (void *dst, const void *src, size_t count).
+
+#include <private/bionic_asm.h>
+#include <private/libc_events.h>
+
+ENTRY(__memcpy_chk)
+ cmp x2, x3
+ b.hi __memcpy_chk_fail
+
+ // Fall through to memcpy...
+ b memcpy
+END(__memcpy_chk)
+
+ .align 6
+ENTRY(memcpy)
+ #include "memcpy_base.S"
+END(memcpy)
+
+ENTRY_PRIVATE(__memcpy_chk_fail)
+ // Preserve for accurate backtrace.
+ stp x29, x30, [sp, -16]!
+ .cfi_def_cfa_offset 16
+ .cfi_rel_offset x29, 0
+ .cfi_rel_offset x30, 8
+
+ adrp x0, error_string
+ add x0, x0, :lo12:error_string
+ ldr x1, error_code
+ bl __fortify_chk_fail
+error_code:
+ .word BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW
+END(__memcpy_chk_fail)
+
+ .data
+ .align 2
+error_string:
+ .string "memcpy: prevented write past end of buffer"
diff --git a/libc/arch-arm64/kryo/bionic/memcpy_base.S b/libc/arch-arm64/kryo/bionic/memcpy_base.S
new file mode 100644
index 0000000..0096bb7
--- /dev/null
+++ b/libc/arch-arm64/kryo/bionic/memcpy_base.S
@@ -0,0 +1,244 @@
+/* Copyright (c) 2015 The Linux Foundation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of The Linux Foundation nor the names of its contributors may
+ * be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef PLDOFFS
+#undef PLDOFFS
+#endif
+#define PLDOFFS (16)
+
+#ifdef PLDTHRESH
+#undef PLDTHRESH
+#endif
+#define PLDTHRESH (PLDOFFS)
+
+#ifdef BBTHRESH
+#undef BBTHRESH
+#endif
+#define BBTHRESH (2048/128)
+
+#if (PLDOFFS < 1)
+#error Routine does not support offsets less than 1
+#endif
+#if (PLDTHRESH < PLDOFFS)
+#error PLD threshold must be greater than or equal to the PLD offset
+#endif
+
+#ifdef PLDSIZE
+#undef PLDSIZE
+#endif
+#define PLDSIZE (128)
+
+kryo_bb_memcpy:
+ mov x11, x0
+ cmp x2, #4
+ blo kryo_bb_lt4
+ cmp x2, #16
+ blo kryo_bb_lt16
+ cmp x2, #32
+ blo kryo_bb_16
+ cmp x2, #64
+ blo kryo_bb_copy_32_a
+ cmp x2, #128
+ blo kryo_bb_copy_64_a
+
+ // we have at least 127 bytes to achieve 128-byte alignment
+ neg x3, x1 // calculate count to get SOURCE aligned
+ ands x3, x3, #0x7F
+ b.eq kryo_bb_source_aligned // already aligned
+ // alignment fixup, small to large (favorable alignment)
+ tbz x3, #0, 1f
+ ldrb w5, [x1], #1
+ strb w5, [x0], #1
+1: tbz x3, #1, 2f
+ ldrh w6, [x1], #2
+ strh w6, [x0], #2
+2: tbz x3, #2, 3f
+ ldr w8, [x1], #4
+ str w8, [x0], #4
+3: tbz x3, #3, 4f
+ ldr x9, [x1], #8
+ str x9, [x0], #8
+4: tbz x3, #4, 5f
+ ldr q7, [x1], #16
+ str q7, [x0], #16
+5: tbz x3, #5, 55f
+ ldp q0, q1, [x1], #32
+ stp q0, q1, [x0], #32
+55: tbz x3, #6, 6f
+ ldp q0, q1, [x1], #32
+ ldp q2, q3, [x1], #32
+ stp q0, q1, [x0], #32
+ stp q2, q3, [x0], #32
+6: subs x2, x2, x3 // fixup count after alignment
+ b.eq kryo_bb_exit
+ cmp x2, #128
+ blo kryo_bb_copy_64_a
+kryo_bb_source_aligned:
+ lsr x12, x2, #7
+ cmp x12, #PLDTHRESH
+ bls kryo_bb_copy_128_loop_nopld
+
+ cmp x12, #BBTHRESH
+ bls kryo_bb_prime_pump
+
+ add x14, x0, #0x400
+ add x9, x1, #(PLDOFFS*PLDSIZE)
+ sub x14, x14, x9
+ lsl x14, x14, #(21+32)
+ lsr x14, x14, #(21+32)
+ add x14, x14, #(PLDOFFS*PLDSIZE)
+ cmp x12, x14, lsr #7
+ bls kryo_bb_prime_pump
+
+ mov x9, #(PLDOFFS)
+ lsr x13, x14, #7
+ subs x9, x13, x9
+ bls kryo_bb_prime_pump
+
+ add x10, x1, x14
+ bic x10, x10, #0x7F // Round to multiple of PLDSIZE
+
+ sub x12, x12, x14, lsr #7
+ cmp x9, x12
+ sub x13, x12, x9
+ csel x12, x13, x12, LS
+ csel x9, x12, x9, HI
+ csel x12, xzr, x12, HI
+
+ prfm PLDL1STRM, [x1, #((PLDOFFS-1)*PLDSIZE)]
+ prfm PLDL1STRM, [x1, #((PLDOFFS-1)*PLDSIZE+64)]
+kryo_bb_copy_128_loop_outer_doublepld:
+ prfm PLDL1STRM, [x1, #((PLDOFFS)*PLDSIZE)]
+ prfm PLDL1STRM, [x1, #((PLDOFFS)*PLDSIZE)+64]
+ subs x9, x9, #1
+ ldp q0, q1, [x1], #32
+ ldp q2, q3, [x1], #32
+ ldp q4, q5, [x1], #32
+ ldp q6, q7, [x1], #32
+ prfm PLDL1KEEP, [x10]
+ prfm PLDL1KEEP, [x10, #64]
+ add x10, x10, #128
+ stp q0, q1, [x0], #32
+ stp q2, q3, [x0], #32
+ stp q4, q5, [x0], #32
+ stp q6, q7, [x0], #32
+ bne kryo_bb_copy_128_loop_outer_doublepld
+ cmp x12, #0
+ beq kryo_bb_pop_before_nopld
+ cmp x12, #(448*1024/128)
+ bls kryo_bb_copy_128_loop_outer
+
+kryo_bb_copy_128_loop_ddr:
+ subs x12, x12, #1
+ ldr x3, [x10], #128
+ ldp q0, q1, [x1], #32
+ ldp q2, q3, [x1], #32
+ ldp q4, q5, [x1], #32
+ ldp q6, q7, [x1], #32
+ stp q0, q1, [x0], #32
+ stp q2, q3, [x0], #32
+ stp q4, q5, [x0], #32
+ stp q6, q7, [x0], #32
+ bne kryo_bb_copy_128_loop_ddr
+ b kryo_bb_pop_before_nopld
+
+kryo_bb_prime_pump:
+ mov x14, #(PLDOFFS*PLDSIZE)
+ add x10, x1, #(PLDOFFS*PLDSIZE)
+ bic x10, x10, #0x7F
+ sub x12, x12, #PLDOFFS
+ prfm PLDL1KEEP, [x10, #(-1*PLDSIZE)]
+ prfm PLDL1KEEP, [x10, #(-1*PLDSIZE+64)]
+ cmp x12, #(448*1024/128)
+ bhi kryo_bb_copy_128_loop_ddr
+
+kryo_bb_copy_128_loop_outer:
+ subs x12, x12, #1
+ prfm PLDL1KEEP, [x10]
+ prfm PLDL1KEEP, [x10, #64]
+ ldp q0, q1, [x1], #32
+ ldp q2, q3, [x1], #32
+ ldp q4, q5, [x1], #32
+ ldp q6, q7, [x1], #32
+ add x10, x10, #128
+ stp q0, q1, [x0], #32
+ stp q2, q3, [x0], #32
+ stp q4, q5, [x0], #32
+ stp q6, q7, [x0], #32
+ bne kryo_bb_copy_128_loop_outer
+
+kryo_bb_pop_before_nopld:
+ lsr x12, x14, #7
+kryo_bb_copy_128_loop_nopld:
+ ldp q0, q1, [x1], #32
+ ldp q2, q3, [x1], #32
+ ldp q4, q5, [x1], #32
+ ldp q6, q7, [x1], #32
+ subs x12, x12, #1
+ stp q0, q1, [x0], #32
+ stp q2, q3, [x0], #32
+ stp q4, q5, [x0], #32
+ stp q6, q7, [x0], #32
+ bne kryo_bb_copy_128_loop_nopld
+ ands x2, x2, #0x7f
+ beq kryo_bb_exit
+
+kryo_bb_copy_64_a:
+ tbz x2, #6, kryo_bb_copy_32_a
+ ldp q0, q1, [x1], #32
+ ldp q2, q3, [x1], #32
+ stp q0, q1, [x0], #32
+ stp q2, q3, [x0], #32
+kryo_bb_copy_32_a:
+ tbz x2, #5, kryo_bb_16
+ ldp q0, q1, [x1], #32
+ stp q0, q1, [x0], #32
+kryo_bb_16:
+ tbz x2, #4, kryo_bb_lt16
+ ldr q7, [x1], #16
+ str q7, [x0], #16
+ ands x2, x2, #0x0f
+ beq kryo_bb_exit
+kryo_bb_lt16:
+ tbz x2, #3, kryo_bb_lt8
+ ldr x3, [x1], #8
+ str x3, [x0], #8
+kryo_bb_lt8:
+ tbz x2, #2, kryo_bb_lt4
+ ldr w3, [x1], #4
+ str w3, [x0], #4
+kryo_bb_lt4:
+ tbz x2, #1, kryo_bb_lt2
+ ldrh w3, [x1], #2
+ strh w3, [x0], #2
+kryo_bb_lt2:
+ tbz x2, #0, kryo_bb_exit
+ ldrb w3, [x1], #1
+ strb w3, [x0], #1
+kryo_bb_exit:
+ mov x0, x11
+ ret
+
diff --git a/libc/arch-arm64/kryo/bionic/memmove.S b/libc/arch-arm64/kryo/bionic/memmove.S
new file mode 100644
index 0000000..739ce49
--- /dev/null
+++ b/libc/arch-arm64/kryo/bionic/memmove.S
@@ -0,0 +1,329 @@
+/* Copyright (c) 2014, Linaro Limited
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the Linaro nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * Unaligned accesses
+ * wchar_t is 4 bytes
+ */
+
+#include <private/bionic_asm.h>
+
+/* Parameters and result. */
+#define dstin x0
+#define src x1
+#define count x2
+#define tmp1 x3
+#define tmp1w w3
+#define tmp2 x4
+#define tmp2w w4
+#define tmp3 x5
+#define tmp3w w5
+#define dst x6
+
+#define A_l x7
+#define A_h x8
+#define B_l x9
+#define B_h x10
+#define C_l x11
+#define C_h x12
+#define D_l x13
+#define D_h x14
+
+#if defined(WMEMMOVE)
+ENTRY(wmemmove)
+ lsl count, count, #2
+#else
+ENTRY(memmove)
+#endif
+ cmp dstin, src
+ b.lo .Ldownwards
+ add tmp1, src, count
+ cmp dstin, tmp1
+ b.hs memcpy /* No overlap. */
+
+ /* Upwards move with potential overlap.
+ * Need to move from the tail backwards. SRC and DST point one
+ * byte beyond the remaining data to move. */
+ add dst, dstin, count
+ add src, src, count
+ cmp count, #64
+ b.ge .Lmov_not_short_up
+
+ /* Deal with small moves quickly by dropping straight into the
+ * exit block. */
+.Ltail63up:
+ /* Move up to 48 bytes of data. At this point we only need the
+ * bottom 6 bits of count to be accurate. */
+ ands tmp1, count, #0x30
+ b.eq .Ltail15up
+ sub dst, dst, tmp1
+ sub src, src, tmp1
+ cmp tmp1w, #0x20
+ b.eq 1f
+ b.lt 2f
+ ldp A_l, A_h, [src, #32]
+ stp A_l, A_h, [dst, #32]
+1:
+ ldp A_l, A_h, [src, #16]
+ stp A_l, A_h, [dst, #16]
+2:
+ ldp A_l, A_h, [src]
+ stp A_l, A_h, [dst]
+.Ltail15up:
+ /* Move up to 15 bytes of data. Does not assume additional data
+ * being moved. */
+ tbz count, #3, 1f
+ ldr tmp1, [src, #-8]!
+ str tmp1, [dst, #-8]!
+1:
+ tbz count, #2, 1f
+ ldr tmp1w, [src, #-4]!
+ str tmp1w, [dst, #-4]!
+1:
+ tbz count, #1, 1f
+ ldrh tmp1w, [src, #-2]!
+ strh tmp1w, [dst, #-2]!
+1:
+ tbz count, #0, 1f
+ ldrb tmp1w, [src, #-1]
+ strb tmp1w, [dst, #-1]
+1:
+ ret
+
+.Lmov_not_short_up:
+ /* We don't much care about the alignment of DST, but we want SRC
+ * to be 128-bit (16 byte) aligned so that we don't cross cache line
+ * boundaries on both loads and stores. */
+ ands tmp2, src, #15 /* Bytes to reach alignment. */
+ b.eq 2f
+ sub count, count, tmp2
+ /* Move enough data to reach alignment; unlike memcpy, we have to
+ * be aware of the overlap, which means we can't move data twice. */
+ tbz tmp2, #3, 1f
+ ldr tmp1, [src, #-8]!
+ str tmp1, [dst, #-8]!
+1:
+ tbz tmp2, #2, 1f
+ ldr tmp1w, [src, #-4]!
+ str tmp1w, [dst, #-4]!
+1:
+ tbz tmp2, #1, 1f
+ ldrh tmp1w, [src, #-2]!
+ strh tmp1w, [dst, #-2]!
+1:
+ tbz tmp2, #0, 1f
+ ldrb tmp1w, [src, #-1]!
+ strb tmp1w, [dst, #-1]!
+1:
+
+ /* There may be less than 63 bytes to go now. */
+ cmp count, #63
+ b.le .Ltail63up
+2:
+ subs count, count, #128
+ b.ge .Lmov_body_large_up
+ /* Less than 128 bytes to move, so handle 64 here and then jump
+ * to the tail. */
+ ldp A_l, A_h, [src, #-64]!
+ ldp B_l, B_h, [src, #16]
+ ldp C_l, C_h, [src, #32]
+ ldp D_l, D_h, [src, #48]
+ stp A_l, A_h, [dst, #-64]!
+ stp B_l, B_h, [dst, #16]
+ stp C_l, C_h, [dst, #32]
+ stp D_l, D_h, [dst, #48]
+ tst count, #0x3f
+ b.ne .Ltail63up
+ ret
+
+ /* Critical loop. Start at a new Icache line boundary. Assuming
+ * 64 bytes per line this ensures the entire loop is in one line. */
+ .p2align 6
+.Lmov_body_large_up:
+ /* There are at least 128 bytes to move. */
+ ldp A_l, A_h, [src, #-16]
+ ldp B_l, B_h, [src, #-32]
+ ldp C_l, C_h, [src, #-48]
+ ldp D_l, D_h, [src, #-64]!
+1:
+ stp A_l, A_h, [dst, #-16]
+ ldp A_l, A_h, [src, #-16]
+ stp B_l, B_h, [dst, #-32]
+ ldp B_l, B_h, [src, #-32]
+ stp C_l, C_h, [dst, #-48]
+ ldp C_l, C_h, [src, #-48]
+ stp D_l, D_h, [dst, #-64]!
+ ldp D_l, D_h, [src, #-64]!
+ subs count, count, #64
+ b.ge 1b
+ stp A_l, A_h, [dst, #-16]
+ stp B_l, B_h, [dst, #-32]
+ stp C_l, C_h, [dst, #-48]
+ stp D_l, D_h, [dst, #-64]!
+ tst count, #0x3f
+ b.ne .Ltail63up
+ ret
+
+
+.Ldownwards:
+ /* For a downwards move we can safely use memcpy provided that
+ * DST is more than 16 bytes away from SRC. */
+ sub tmp1, src, #16
+ cmp dstin, tmp1
+ b.ls memcpy /* May overlap, but not critically. */
+
+ mov dst, dstin /* Preserve DSTIN for return value. */
+ cmp count, #64
+ b.ge .Lmov_not_short_down
+
+ /* Deal with small moves quickly by dropping straight into the
+ * exit block. */
+.Ltail63down:
+ /* Move up to 48 bytes of data. At this point we only need the
+ * bottom 6 bits of count to be accurate. */
+ ands tmp1, count, #0x30
+ b.eq .Ltail15down
+ add dst, dst, tmp1
+ add src, src, tmp1
+ cmp tmp1w, #0x20
+ b.eq 1f
+ b.lt 2f
+ ldp A_l, A_h, [src, #-48]
+ stp A_l, A_h, [dst, #-48]
+1:
+ ldp A_l, A_h, [src, #-32]
+ stp A_l, A_h, [dst, #-32]
+2:
+ ldp A_l, A_h, [src, #-16]
+ stp A_l, A_h, [dst, #-16]
+.Ltail15down:
+ /* Move up to 15 bytes of data. Does not assume additional data
+ being moved. */
+ tbz count, #3, 1f
+ ldr tmp1, [src], #8
+ str tmp1, [dst], #8
+1:
+ tbz count, #2, 1f
+ ldr tmp1w, [src], #4
+ str tmp1w, [dst], #4
+1:
+ tbz count, #1, 1f
+ ldrh tmp1w, [src], #2
+ strh tmp1w, [dst], #2
+1:
+ tbz count, #0, 1f
+ ldrb tmp1w, [src]
+ strb tmp1w, [dst]
+1:
+ ret
+
+.Lmov_not_short_down:
+ /* We don't much care about the alignment of DST, but we want SRC
+ * to be 128-bit (16 byte) aligned so that we don't cross cache line
+ * boundaries on both loads and stores. */
+ neg tmp2, src
+ ands tmp2, tmp2, #15 /* Bytes to reach alignment. */
+ b.eq 2f
+ sub count, count, tmp2
+ /* Move enough data to reach alignment; unlike memcpy, we have to
+ * be aware of the overlap, which means we can't move data twice. */
+ tbz tmp2, #3, 1f
+ ldr tmp1, [src], #8
+ str tmp1, [dst], #8
+1:
+ tbz tmp2, #2, 1f
+ ldr tmp1w, [src], #4
+ str tmp1w, [dst], #4
+1:
+ tbz tmp2, #1, 1f
+ ldrh tmp1w, [src], #2
+ strh tmp1w, [dst], #2
+1:
+ tbz tmp2, #0, 1f
+ ldrb tmp1w, [src], #1
+ strb tmp1w, [dst], #1
+1:
+
+ /* There may be less than 63 bytes to go now. */
+ cmp count, #63
+ b.le .Ltail63down
+2:
+ subs count, count, #128
+ b.ge .Lmov_body_large_down
+ /* Less than 128 bytes to move, so handle 64 here and then jump
+ * to the tail. */
+ ldp A_l, A_h, [src]
+ ldp B_l, B_h, [src, #16]
+ ldp C_l, C_h, [src, #32]
+ ldp D_l, D_h, [src, #48]
+ stp A_l, A_h, [dst]
+ stp B_l, B_h, [dst, #16]
+ stp C_l, C_h, [dst, #32]
+ stp D_l, D_h, [dst, #48]
+ tst count, #0x3f
+ add src, src, #64
+ add dst, dst, #64
+ b.ne .Ltail63down
+ ret
+
+ /* Critical loop. Start at a new cache line boundary. Assuming
+ * 64 bytes per line this ensures the entire loop is in one line. */
+ .p2align 6
+.Lmov_body_large_down:
+ /* There are at least 128 bytes to move. */
+ ldp A_l, A_h, [src, #0]
+ sub dst, dst, #16 /* Pre-bias. */
+ ldp B_l, B_h, [src, #16]
+ ldp C_l, C_h, [src, #32]
+ ldp D_l, D_h, [src, #48]! /* src += 64 - Pre-bias. */
+1:
+ stp A_l, A_h, [dst, #16]
+ ldp A_l, A_h, [src, #16]
+ stp B_l, B_h, [dst, #32]
+ ldp B_l, B_h, [src, #32]
+ stp C_l, C_h, [dst, #48]
+ ldp C_l, C_h, [src, #48]
+ stp D_l, D_h, [dst, #64]!
+ ldp D_l, D_h, [src, #64]!
+ subs count, count, #64
+ b.ge 1b
+ stp A_l, A_h, [dst, #16]
+ stp B_l, B_h, [dst, #32]
+ stp C_l, C_h, [dst, #48]
+ stp D_l, D_h, [dst, #64]
+ add src, src, #16
+ add dst, dst, #64 + 16
+ tst count, #0x3f
+ b.ne .Ltail63down
+ ret
+#if defined(WMEMMOVE)
+END(wmemmove)
+#else
+END(memmove)
+#endif
diff --git a/libc/arch-arm64/kryo/kryo.mk b/libc/arch-arm64/kryo/kryo.mk
new file mode 100644
index 0000000..f85638d
--- /dev/null
+++ b/libc/arch-arm64/kryo/kryo.mk
@@ -0,0 +1,15 @@
+libc_bionic_src_files_arm64 += \
+ arch-arm64/generic/bionic/memchr.S \
+ arch-arm64/generic/bionic/memcmp.S \
+ arch-arm64/kryo/bionic/memcpy.S \
+ arch-arm64/kryo/bionic/memmove.S \
+ arch-arm64/generic/bionic/memset.S \
+ arch-arm64/generic/bionic/stpcpy.S \
+ arch-arm64/generic/bionic/strchr.S \
+ arch-arm64/generic/bionic/strcmp.S \
+ arch-arm64/generic/bionic/strcpy.S \
+ arch-arm64/generic/bionic/strlen.S \
+ arch-arm64/generic/bionic/strncmp.S \
+ arch-arm64/generic/bionic/strnlen.S \
+ arch-arm64/generic/bionic/strrchr.S \
+ arch-arm64/generic/bionic/wmemmove.S
diff --git a/libc/arch-x86_64/string/sse2-memmove-slm.S b/libc/arch-x86_64/string/sse2-memmove-slm.S
index 0dbffad..6a5afd6 100644
--- a/libc/arch-x86_64/string/sse2-memmove-slm.S
+++ b/libc/arch-x86_64/string/sse2-memmove-slm.S
@@ -91,9 +91,6 @@ name: \
.section .text.sse2,"ax",@progbits
ENTRY (MEMMOVE)
ENTRANCE
-#ifdef USE_AS_BCOPY
- xchg %rsi, %rdi
-#endif
mov %rdi, %rax
/* Check whether we should copy backward or forward. */
diff --git a/libc/bionic/legacy_32_bit_support.cpp b/libc/bionic/legacy_32_bit_support.cpp
index a107664..1336b43 100644
--- a/libc/bionic/legacy_32_bit_support.cpp
+++ b/libc/bionic/legacy_32_bit_support.cpp
@@ -91,3 +91,22 @@ int getrlimit64(int resource, rlimit64* limits64) {
int setrlimit64(int resource, const rlimit64* limits64) {
return prlimit64(0, resource, limits64, NULL);
}
+
+// There is no prlimit system call, so we need to use prlimit64.
+int prlimit(pid_t pid, int resource, const rlimit* n32, rlimit* o32) {
+ rlimit64 n64;
+ if (n32 != nullptr) {
+ n64.rlim_cur = (n32->rlim_cur == RLIM_INFINITY) ? RLIM64_INFINITY : n32->rlim_cur;
+ n64.rlim_max = (n32->rlim_max == RLIM_INFINITY) ? RLIM64_INFINITY : n32->rlim_max;
+ }
+
+ rlimit64 o64;
+ int result = prlimit64(pid, resource,
+ (n32 != nullptr) ? &n64 : nullptr,
+ (o32 != nullptr) ? &o64 : nullptr);
+ if (result != -1 && o32 != nullptr) {
+ o32->rlim_cur = (o64.rlim_cur == RLIM64_INFINITY) ? RLIM_INFINITY : o64.rlim_cur;
+ o32->rlim_max = (o64.rlim_max == RLIM64_INFINITY) ? RLIM_INFINITY : o64.rlim_max;
+ }
+ return result;
+}
diff --git a/libc/bionic/libc_init_common.cpp b/libc/bionic/libc_init_common.cpp
index bd71628..ecde8d4 100644
--- a/libc/bionic/libc_init_common.cpp
+++ b/libc/bionic/libc_init_common.cpp
@@ -250,6 +250,7 @@ static bool __is_unsafe_environment_variable(const char* name) {
"LD_ORIGIN_PATH",
"LD_PRELOAD",
"LD_PROFILE",
+ "LD_SHIM_LIBS",
"LD_SHOW_AUXV",
"LD_USE_LOAD_BIAS",
"LOCALDOMAIN",
diff --git a/libc/bionic/malloc_debug_check.cpp b/libc/bionic/malloc_debug_check.cpp
index dee03fa..ad0e613 100644
--- a/libc/bionic/malloc_debug_check.cpp
+++ b/libc/bionic/malloc_debug_check.cpp
@@ -45,6 +45,7 @@
#include <time.h>
#include <unistd.h>
#include <unwind.h>
+#include <signal.h>
#include "debug_mapinfo.h"
#include "debug_stacktrace.h"
@@ -55,6 +56,14 @@
#include "private/libc_logging.h"
#include "private/ScopedPthreadMutexLocker.h"
+static unsigned int malloc_sig_enabled = 0;
+static unsigned int min_allocation_report_limit;
+static unsigned int max_allocation_limit;
+static const char* process_name;
+static size_t total_count = 0;
+static bool isDumped = false;
+static bool sigHandled = false;
+
#define MAX_BACKTRACE_DEPTH 16
#define ALLOCATION_TAG 0x1ee7d00d
#define BACKLOG_TAG 0xbabecafe
@@ -63,6 +72,11 @@
#define FRONT_GUARD_LEN (1<<5)
#define REAR_GUARD 0xbb
#define REAR_GUARD_LEN (1<<5)
+#define FRONT_GUARD_SS 0xab
+#define DEBUG_SIGNAL SIGWINCH
+
+static void malloc_sigaction(int signum, siginfo_t * sg, void * cxt);
+static struct sigaction default_sa;
static void log_message(const char* format, ...) {
va_list args;
@@ -135,9 +149,14 @@ static inline void init_front_guard(hdr_t* hdr) {
memset(hdr->front_guard, FRONT_GUARD, FRONT_GUARD_LEN);
}
+static inline void set_snapshot(hdr_t* hdr) {
+ memset(hdr->front_guard, FRONT_GUARD_SS, FRONT_GUARD_LEN);
+}
+
static inline bool is_front_guard_valid(hdr_t* hdr) {
for (size_t i = 0; i < FRONT_GUARD_LEN; i++) {
- if (hdr->front_guard[i] != FRONT_GUARD) {
+ if (!((hdr->front_guard[i] == FRONT_GUARD) ||
+ (hdr->front_guard[i] == FRONT_GUARD_SS))) {
return false;
}
}
@@ -171,6 +190,9 @@ static inline bool is_rear_guard_valid(hdr_t* hdr) {
}
static inline void add_locked(hdr_t* hdr, hdr_t** tail, hdr_t** head) {
+ if (hdr->tag == ALLOCATION_TAG) {
+ total_count += hdr->size;
+ }
hdr->prev = NULL;
hdr->next = *head;
if (*head)
@@ -181,6 +203,9 @@ static inline void add_locked(hdr_t* hdr, hdr_t** tail, hdr_t** head) {
}
static inline int del_locked(hdr_t* hdr, hdr_t** tail, hdr_t** head) {
+ if (hdr->tag == ALLOCATION_TAG) {
+ total_count -= hdr->size;
+ }
if (hdr->prev) {
hdr->prev->next = hdr->next;
} else {
@@ -194,6 +219,25 @@ static inline int del_locked(hdr_t* hdr, hdr_t** tail, hdr_t** head) {
return 0;
}
+static void snapshot_report_leaked_nodes() {
+ log_message("%s: %s\n", __FILE__, __FUNCTION__);
+ hdr_t * iterator = head;
+ size_t total_size = 0;
+ do {
+ if (iterator->front_guard[0] == FRONT_GUARD &&
+ iterator->size >= min_allocation_report_limit) {
+ log_message("obj %p, size %d", iterator, iterator->size);
+ total_size += iterator->size;
+ log_backtrace(iterator->bt, iterator->bt_depth);
+ log_message("------------------------------"); // as an end marker
+ // Marking the node as we do not want to print it again.
+ set_snapshot(iterator);
+ }
+ iterator = iterator->next;
+ } while (iterator);
+ log_message("Total Pending allocations after last snapshot: %d", total_size);
+}
+
static inline void add(hdr_t* hdr, size_t size) {
ScopedPthreadMutexLocker locker(&lock);
hdr->tag = ALLOCATION_TAG;
@@ -202,6 +246,11 @@ static inline void add(hdr_t* hdr, size_t size) {
init_rear_guard(hdr);
++g_allocated_block_count;
add_locked(hdr, &tail, &head);
+ if ((total_count >= max_allocation_limit) && !isDumped && malloc_sig_enabled) {
+ isDumped = true;
+ sigHandled = true; // Need to bypass the snapshot
+ kill(getpid(), DEBUG_SIGNAL);
+ }
}
static inline int del(hdr_t* hdr) {
@@ -233,7 +282,8 @@ static bool was_used_after_free(hdr_t* hdr) {
static inline int check_guards(hdr_t* hdr, int* safe) {
*safe = 1;
if (!is_front_guard_valid(hdr)) {
- if (hdr->front_guard[0] == FRONT_GUARD) {
+ if ((hdr->front_guard[0] == FRONT_GUARD) ||
+ ((hdr->front_guard[0] == FRONT_GUARD_SS))) {
log_message("+++ ALLOCATION %p SIZE %d HAS A CORRUPTED FRONT GUARD\n",
user(hdr), hdr->size);
} else {
@@ -656,6 +706,42 @@ extern "C" bool malloc_debug_initialize(HashTable* hash_table, const MallocDebug
__libc_format_log(ANDROID_LOG_INFO, "libc", "not gathering backtrace information\n");
}
+ if (__system_property_get("libc.debug.malloc", env)) {
+ if(atoi(env) == 40) malloc_sig_enabled = 1;
+ }
+
+ if (malloc_sig_enabled) {
+ char debug_proc_size[PROP_VALUE_MAX];
+ if (__system_property_get("libc.debug.malloc.maxprocsize", debug_proc_size))
+ max_allocation_limit = atoi(debug_proc_size);
+ else
+ max_allocation_limit = 30 * 1024 * 1024; // In Bytes [Default is 30 MB]
+ if (__system_property_get("libc.debug.malloc.minalloclim", debug_proc_size))
+ min_allocation_report_limit = atoi(debug_proc_size);
+ else
+ min_allocation_report_limit = 10 * 1024; // In Bytes [Default is 10 KB]
+ process_name = getprogname();
+ }
+
+/* Initializes malloc debugging framework.
+ * See comments on MallocDebugInit in malloc_debug_common.h
+ */
+ if (malloc_sig_enabled) {
+ struct sigaction sa; //local or static?
+ sa.sa_handler = NULL;
+ sa.sa_sigaction = malloc_sigaction;
+ sigemptyset(&sa.sa_mask);
+ sigaddset(&sa.sa_mask, DEBUG_SIGNAL);
+ sa.sa_flags = SA_SIGINFO;
+ sa.sa_restorer = NULL;
+ if (sigaction(DEBUG_SIGNAL, &sa, &default_sa) < 0) {
+ log_message("Failed to register signal handler w/ errno %s", strerror(errno));
+ malloc_sig_enabled = 0;
+ } else {
+ log_message("Registered signal handler");
+ sigHandled = false;
+ }
+ }
if (g_backtrace_enabled) {
backtrace_startup();
}
@@ -668,9 +754,66 @@ extern "C" void malloc_debug_finalize(int malloc_debug_level) {
if (malloc_debug_level == 10) {
ReportMemoryLeaks();
}
+ if (malloc_sig_enabled) {
+ log_message("Deregister %d signal handler", DEBUG_SIGNAL);
+ sigaction(DEBUG_SIGNAL, &default_sa, NULL);
+ malloc_sig_enabled = 0;
+ sigHandled = false;
+ }
if (g_backtrace_enabled) {
backtrace_shutdown();
}
pthread_setspecific(g_debug_calls_disabled, NULL);
}
+
+static void snapshot_nodes_locked() {
+ log_message("%s: %s\n", __FILE__, __FUNCTION__);
+ hdr_t * iterator = head;
+ do {
+ if (iterator->front_guard[0] == FRONT_GUARD) {
+ set_snapshot(iterator);
+ }
+ iterator = iterator->next;
+ } while (iterator);
+}
+
+static void malloc_sigaction(int signum, siginfo_t * info, void * context)
+{
+ log_message("%s: %s\n", __FILE__, __FUNCTION__);
+ log_message("%s got %d signal from PID: %d (context:%x)\n",
+ __func__, signum, info->si_pid, context);
+
+ if (signum != DEBUG_SIGNAL) {
+ log_message("RECEIVED %d instead of %d\n", signum, DEBUG_SIGNAL);
+ return;
+ }
+
+ log_message("Process under observation:%s", process_name);
+ log_message("Maximum process size limit:%d Bytes", max_allocation_limit);
+ log_message("Won't print allocation below %d Bytes", min_allocation_report_limit);
+ log_message("Total count: %d\n", total_count);
+
+ if (!head) {
+ log_message("No allocations?");
+ return;
+ }
+ // If sigHandled is false, meaning it's being handled first time
+ if (!sigHandled) {
+ sigHandled = true;
+ // Marking the nodes assuming that they should not be leaked nodes.
+ snapshot_nodes_locked();
+ } else {
+ // We need to print new allocations now
+ log_message("Start dumping allocations of the process %s", process_name);
+ log_message("+++ *** +++ *** +++ *** +++ *** +++ *** +++ *** +++ *** +++ ***\n");
+
+ // Print allocations of the process
+ if (g_backtrace_enabled)
+ snapshot_report_leaked_nodes();
+
+ log_message("*** +++ *** +++ *** +++ *** +++ *** +++ *** +++ *** +++ *** +++\n");
+ log_message("Completed dumping allocations of the process %s", process_name);
+ }
+ return;
+}
diff --git a/libc/bionic/malloc_debug_common.cpp b/libc/bionic/malloc_debug_common.cpp
index ee796c6..12fc6dd 100644
--- a/libc/bionic/malloc_debug_common.cpp
+++ b/libc/bionic/malloc_debug_common.cpp
@@ -396,6 +396,9 @@ static void malloc_init_impl() {
}
so_name = "libc_malloc_debug_qemu.so";
break;
+ case 40:
+ so_name = "libc_malloc_debug_leak.so";
+ break;
default:
error_log("%s: Debug level %d is unknown\n", getprogname(), g_malloc_debug_level);
return;
@@ -456,6 +459,9 @@ static void malloc_init_impl() {
case 20:
InitMalloc(malloc_impl_handle, &malloc_dispatch_table, "qemu_instrumented");
break;
+ case 40:
+ InitMalloc(malloc_impl_handle, &malloc_dispatch_table, "chk");
+ break;
default:
break;
}
diff --git a/libc/bionic/mmap.cpp b/libc/bionic/mmap.cpp
index 8f25a89..53e8b46 100644
--- a/libc/bionic/mmap.cpp
+++ b/libc/bionic/mmap.cpp
@@ -36,6 +36,11 @@
extern "C" void* __mmap2(void*, size_t, int, int, int, size_t);
#define MMAP2_SHIFT 12 // 2**12 == 4096
+#ifdef LEGACY_MMAP
+#define TO_64(a) ((a) & 0x00000000ffffffff)
+#else
+#define TO_64(a) (a)
+#endif
static bool kernel_has_MADV_MERGEABLE = true;
@@ -60,5 +65,5 @@ void* mmap64(void* addr, size_t size, int prot, int flags, int fd, off64_t offse
}
void* mmap(void* addr, size_t size, int prot, int flags, int fd, off_t offset) {
- return mmap64(addr, size, prot, flags, fd, static_cast<off64_t>(offset));
+ return mmap64(addr, size, prot, flags, fd, TO_64(static_cast<off64_t>(offset)));
}
diff --git a/libc/dns/net/getaddrinfo.c b/libc/dns/net/getaddrinfo.c
index 829b679..cc8b8b4 100644
--- a/libc/dns/net/getaddrinfo.c
+++ b/libc/dns/net/getaddrinfo.c
@@ -1791,10 +1791,14 @@ _find_src_addr(const struct sockaddr *addr, struct sockaddr *src_addr, unsigned
return -1;
}
}
- if (mark != MARK_UNSET && setsockopt(sock, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)) < 0)
+ if (mark != MARK_UNSET && setsockopt(sock, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)) < 0) {
+ close(sock);
return 0;
- if (uid > 0 && uid != NET_CONTEXT_INVALID_UID && fchown(sock, uid, (gid_t)-1) < 0)
+ }
+ if (uid > 0 && uid != NET_CONTEXT_INVALID_UID && fchown(sock, uid, (gid_t)-1) < 0) {
+ close(sock);
return 0;
+ }
do {
ret = __connect(sock, addr, len);
} while (ret == -1 && errno == EINTR);
diff --git a/libc/include/libgen.h b/libc/include/libgen.h
index e89328e..4d22d15 100644
--- a/libc/include/libgen.h
+++ b/libc/include/libgen.h
@@ -32,18 +32,19 @@
#include <sys/cdefs.h>
#include <sys/types.h>
+
__BEGIN_DECLS
-#if !defined(__bionic_using_gnu_basename)
/*
- * <string.h> gets you the GNU basename.
- * <libgen.h> the POSIX one.
- * Note that our "POSIX" one has the wrong argument cv-qualifiers, but doesn't
- * modify its input and uses thread-local storage for the result if necessary.
+ * Including <string.h> will get you the GNU basename, unless <libgen.h> is
+ * included, either before or after including <string.h>.
+ *
+ * Note that this has the wrong argument cv-qualifiers, but doesn't modify its
+ * input and uses thread-local storage for the result if necessary.
*/
-extern char* basename(const char*);
-#define __bionic_using_posix_basename
-#endif
+extern char* __posix_basename(const char*) __RENAME(basename);
+
+#define basename __posix_basename
/* This has the wrong argument cv-qualifiers, but doesn't modify its input and uses thread-local storage for the result if necessary. */
extern char* dirname(const char*);
diff --git a/libc/include/paths.h b/libc/include/paths.h
index 82c2804..7700cdd 100644
--- a/libc/include/paths.h
+++ b/libc/include/paths.h
@@ -33,6 +33,7 @@
#define _PATHS_H_
#define _PATH_BSHELL "/system/bin/sh"
+#define _PATH_BSHELL2 "/sbin/sh"
#define _PATH_CONSOLE "/dev/console"
#define _PATH_DEFPATH "/sbin:/vendor/bin:/system/sbin:/system/bin:/system/xbin"
#define _PATH_DEV "/dev/"
diff --git a/libc/include/regex.h b/libc/include/regex.h
index aec38e3..b06a515 100644
--- a/libc/include/regex.h
+++ b/libc/include/regex.h
@@ -42,8 +42,9 @@
#include <sys/cdefs.h>
#include <sys/types.h>
-/* types */
-typedef off_t regoff_t;
+/* POSIX says regoff_t is at least as large as the larger of ptrdiff_t and
+ * ssize_t. BSD uses off_t, but that interacts badly with _FILE_OFFSET_BITS. */
+typedef ssize_t regoff_t;
typedef struct {
int re_magic;
diff --git a/libc/include/string.h b/libc/include/string.h
index d32c164..8ceccd5 100644
--- a/libc/include/string.h
+++ b/libc/include/string.h
@@ -107,18 +107,18 @@ extern size_t strxfrm(char* __restrict, const char* __restrict, size_t);
extern int strcoll_l(const char *, const char *, locale_t) __purefunc;
extern size_t strxfrm_l(char* __restrict, const char* __restrict, size_t, locale_t);
-#if defined(__USE_GNU) && !defined(__bionic_using_posix_basename)
+#if defined(__USE_GNU) && !defined(basename)
/*
* glibc has a basename in <string.h> that's different to the POSIX one in <libgen.h>.
* It doesn't modify its argument, and in C++ it's const-correct.
*/
+
#if defined(__cplusplus)
extern "C++" char* basename(char*) __RENAME(__gnu_basename) __nonnull((1));
extern "C++" const char* basename(const char*) __RENAME(__gnu_basename) __nonnull((1));
#else
extern char* basename(const char*) __RENAME(__gnu_basename) __nonnull((1));
#endif
-#define __bionic_using_gnu_basename
#endif
extern void* __memchr_chk(const void*, int, size_t, size_t);
diff --git a/libc/include/sys/resource.h b/libc/include/sys/resource.h
index 3f8dd45..8209dfb 100644
--- a/libc/include/sys/resource.h
+++ b/libc/include/sys/resource.h
@@ -53,10 +53,7 @@ extern int setpriority(int, int, int);
extern int getrusage(int, struct rusage*);
-#if __LP64__
-/* Implementing prlimit for 32-bit isn't worth the effort. */
extern int prlimit(pid_t, int, const struct rlimit*, struct rlimit*);
-#endif
extern int prlimit64(pid_t, int, const struct rlimit64*, struct rlimit64*);
__END_DECLS
diff --git a/libc/kernel/uapi/linux/android_alarm.h b/libc/kernel/uapi/linux/android_alarm.h
index 801a01e..9f2de28 100644
--- a/libc/kernel/uapi/linux/android_alarm.h
+++ b/libc/kernel/uapi/linux/android_alarm.h
@@ -28,28 +28,31 @@ enum android_alarm_type {
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
ANDROID_ALARM_ELAPSED_REALTIME,
ANDROID_ALARM_SYSTEMTIME,
+ ANDROID_ALARM_RTC_POWEROFF_WAKEUP,
ANDROID_ALARM_TYPE_COUNT,
-};
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+};
enum android_alarm_return_flags {
ANDROID_ALARM_RTC_WAKEUP_MASK = 1U << ANDROID_ALARM_RTC_WAKEUP,
ANDROID_ALARM_RTC_MASK = 1U << ANDROID_ALARM_RTC,
- ANDROID_ALARM_ELAPSED_REALTIME_WAKEUP_MASK = 1U << ANDROID_ALARM_ELAPSED_REALTIME_WAKEUP,
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+ ANDROID_ALARM_ELAPSED_REALTIME_WAKEUP_MASK = 1U << ANDROID_ALARM_ELAPSED_REALTIME_WAKEUP,
ANDROID_ALARM_ELAPSED_REALTIME_MASK = 1U << ANDROID_ALARM_ELAPSED_REALTIME,
ANDROID_ALARM_SYSTEMTIME_MASK = 1U << ANDROID_ALARM_SYSTEMTIME,
+ ANDROID_ALARM_RTC_POWEROFF_WAKEUP_MASK = 1U << ANDROID_ALARM_RTC_POWEROFF_WAKEUP,
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
ANDROID_ALARM_TIME_CHANGE_MASK = 1U << 16
};
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
#define ANDROID_ALARM_CLEAR(type) _IO('a', 0 | ((type) << 4))
#define ANDROID_ALARM_WAIT _IO('a', 1)
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
#define ALARM_IOW(c,type,size) _IOW('a', (c) | ((type) << 4), size)
#define ANDROID_ALARM_SET(type) ALARM_IOW(2, type, struct timespec)
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
#define ANDROID_ALARM_SET_AND_WAIT(type) ALARM_IOW(3, type, struct timespec)
#define ANDROID_ALARM_GET_TIME(type) ALARM_IOW(4, type, struct timespec)
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
#define ANDROID_ALARM_SET_RTC _IOW('a', 5, struct timespec)
#define ANDROID_ALARM_BASE_CMD(cmd) (cmd & ~(_IOC(0, 0, 0xf0, 0)))
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
#define ANDROID_ALARM_IOCTL_TO_TYPE(cmd) (_IOC_NR(cmd) >> 4)
#endif
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
diff --git a/libc/kernel/uapi/linux/if_packet.h b/libc/kernel/uapi/linux/if_packet.h
index 6e9ae6a..ba283c6 100644
--- a/libc/kernel/uapi/linux/if_packet.h
+++ b/libc/kernel/uapi/linux/if_packet.h
@@ -119,143 +119,145 @@ struct tpacket_auxdata {
#define TP_STATUS_VLAN_VALID (1 << 4)
#define TP_STATUS_BLK_TMO (1 << 5)
#define TP_STATUS_VLAN_TPID_VALID (1 << 6)
-#define TP_STATUS_AVAILABLE 0
+#define TP_STATUS_CSUM_UNNECESSARY (1 << 7)
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define TP_STATUS_AVAILABLE 0
#define TP_STATUS_SEND_REQUEST (1 << 0)
#define TP_STATUS_SENDING (1 << 1)
#define TP_STATUS_WRONG_FORMAT (1 << 2)
-#define TP_STATUS_TS_SOFTWARE (1 << 29)
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define TP_STATUS_TS_SOFTWARE (1 << 29)
#define TP_STATUS_TS_SYS_HARDWARE (1 << 30)
#define TP_STATUS_TS_RAW_HARDWARE (1 << 31)
#define TP_FT_REQ_FILL_RXHASH 0x1
-struct tpacket_hdr {
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+struct tpacket_hdr {
unsigned long tp_status;
unsigned int tp_len;
unsigned int tp_snaplen;
- unsigned short tp_mac;
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+ unsigned short tp_mac;
unsigned short tp_net;
unsigned int tp_sec;
unsigned int tp_usec;
-};
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+};
#define TPACKET_ALIGNMENT 16
#define TPACKET_ALIGN(x) (((x) + TPACKET_ALIGNMENT - 1) & ~(TPACKET_ALIGNMENT - 1))
#define TPACKET_HDRLEN (TPACKET_ALIGN(sizeof(struct tpacket_hdr)) + sizeof(struct sockaddr_ll))
-struct tpacket2_hdr {
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+struct tpacket2_hdr {
__u32 tp_status;
__u32 tp_len;
__u32 tp_snaplen;
- __u16 tp_mac;
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+ __u16 tp_mac;
__u16 tp_net;
__u32 tp_sec;
__u32 tp_nsec;
- __u16 tp_vlan_tci;
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+ __u16 tp_vlan_tci;
__u16 tp_vlan_tpid;
__u8 tp_padding[4];
};
-struct tpacket_hdr_variant1 {
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+struct tpacket_hdr_variant1 {
__u32 tp_rxhash;
__u32 tp_vlan_tci;
__u16 tp_vlan_tpid;
- __u16 tp_padding;
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+ __u16 tp_padding;
};
struct tpacket3_hdr {
__u32 tp_next_offset;
- __u32 tp_sec;
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+ __u32 tp_sec;
__u32 tp_nsec;
__u32 tp_snaplen;
__u32 tp_len;
- __u32 tp_status;
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+ __u32 tp_status;
__u16 tp_mac;
__u16 tp_net;
union {
- struct tpacket_hdr_variant1 hv1;
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+ struct tpacket_hdr_variant1 hv1;
};
__u8 tp_padding[8];
};
-struct tpacket_bd_ts {
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+struct tpacket_bd_ts {
unsigned int ts_sec;
union {
unsigned int ts_usec;
- unsigned int ts_nsec;
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+ unsigned int ts_nsec;
};
};
struct tpacket_hdr_v1 {
- __u32 block_status;
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+ __u32 block_status;
__u32 num_pkts;
__u32 offset_to_first_pkt;
__u32 blk_len;
- __aligned_u64 seq_num;
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+ __aligned_u64 seq_num;
struct tpacket_bd_ts ts_first_pkt, ts_last_pkt;
};
union tpacket_bd_header_u {
- struct tpacket_hdr_v1 bh1;
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+ struct tpacket_hdr_v1 bh1;
};
struct tpacket_block_desc {
__u32 version;
- __u32 offset_to_priv;
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+ __u32 offset_to_priv;
union tpacket_bd_header_u hdr;
};
#define TPACKET2_HDRLEN (TPACKET_ALIGN(sizeof(struct tpacket2_hdr)) + sizeof(struct sockaddr_ll))
-#define TPACKET3_HDRLEN (TPACKET_ALIGN(sizeof(struct tpacket3_hdr)) + sizeof(struct sockaddr_ll))
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define TPACKET3_HDRLEN (TPACKET_ALIGN(sizeof(struct tpacket3_hdr)) + sizeof(struct sockaddr_ll))
enum tpacket_versions {
TPACKET_V1,
TPACKET_V2,
- TPACKET_V3
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+ TPACKET_V3
};
struct tpacket_req {
unsigned int tp_block_size;
- unsigned int tp_block_nr;
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+ unsigned int tp_block_nr;
unsigned int tp_frame_size;
unsigned int tp_frame_nr;
};
-struct tpacket_req3 {
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+struct tpacket_req3 {
unsigned int tp_block_size;
unsigned int tp_block_nr;
unsigned int tp_frame_size;
- unsigned int tp_frame_nr;
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+ unsigned int tp_frame_nr;
unsigned int tp_retire_blk_tov;
unsigned int tp_sizeof_priv;
unsigned int tp_feature_req_word;
-};
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+};
union tpacket_req_u {
struct tpacket_req req;
struct tpacket_req3 req3;
-};
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+};
struct packet_mreq {
int mr_ifindex;
unsigned short mr_type;
- unsigned short mr_alen;
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+ unsigned short mr_alen;
unsigned char mr_address[8];
};
#define PACKET_MR_MULTICAST 0
-#define PACKET_MR_PROMISC 1
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define PACKET_MR_PROMISC 1
#define PACKET_MR_ALLMULTI 2
#define PACKET_MR_UNICAST 3
#endif
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
diff --git a/libc/kernel/uapi/linux/time.h b/libc/kernel/uapi/linux/time.h
index bf245fc..5690d27 100644
--- a/libc/kernel/uapi/linux/time.h
+++ b/libc/kernel/uapi/linux/time.h
@@ -67,9 +67,10 @@ struct itimerval {
#define CLOCK_SGI_CYCLE 10
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
#define CLOCK_TAI 11
+#define CLOCK_POWEROFF_ALARM 12
#define MAX_CLOCKS 16
#define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC)
-#define CLOCKS_MONO CLOCK_MONOTONIC
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define CLOCKS_MONO CLOCK_MONOTONIC
#define TIMER_ABSTIME 0x01
#endif
diff --git a/libc/upstream-netbsd/lib/libc/gen/popen.c b/libc/upstream-netbsd/lib/libc/gen/popen.c
index 593e346..b6ce47c 100644
--- a/libc/upstream-netbsd/lib/libc/gen/popen.c
+++ b/libc/upstream-netbsd/lib/libc/gen/popen.c
@@ -152,6 +152,8 @@ popen(const char *command, const char *type)
}
execl(_PATH_BSHELL, "sh", "-c", command, NULL);
+ if (errno == ENOENT)
+ execl(_PATH_BSHELL2, "sh", "-c", command, NULL);
_exit(127);
/* NOTREACHED */
}
diff --git a/libm/Android.mk b/libm/Android.mk
index e919129..f053e25 100644
--- a/libm/Android.mk
+++ b/libm/Android.mk
@@ -107,8 +107,6 @@ LOCAL_SRC_FILES := \
upstream-freebsd/lib/msun/src/s_exp2.c \
upstream-freebsd/lib/msun/src/s_exp2f.c \
upstream-freebsd/lib/msun/src/s_expm1f.c \
- upstream-freebsd/lib/msun/src/s_fabs.c \
- upstream-freebsd/lib/msun/src/s_fabsf.c \
upstream-freebsd/lib/msun/src/s_fdim.c \
upstream-freebsd/lib/msun/src/s_finite.c \
upstream-freebsd/lib/msun/src/s_finitef.c \
@@ -174,7 +172,6 @@ LOCAL_SRC_FILES_64 := \
upstream-freebsd/lib/msun/src/s_copysignl.c \
upstream-freebsd/lib/msun/src/e_coshl.c \
upstream-freebsd/lib/msun/src/s_cosl.c \
- upstream-freebsd/lib/msun/src/s_fabsl.c \
upstream-freebsd/lib/msun/src/s_floorl.c \
upstream-freebsd/lib/msun/src/s_fmal.c \
upstream-freebsd/lib/msun/src/s_fmaxl.c \
@@ -227,6 +224,10 @@ LOCAL_SRC_FILES += \
LOCAL_SRC_FILES += \
signbit.c \
+# Home-grown stuff.
+LOCAL_SRC_FILES += \
+ fabs.cpp \
+
# Arch specific optimizations.
# -----------------------------------------------------------------------------
@@ -282,9 +283,8 @@ LOCAL_SRC_FILES_arm += \
else
LOCAL_SRC_FILES_arm += \
- arm/e_sqrt.S \
- arm/e_sqrtf.S \
- arm/s_floor.S \
+ arm/sqrt.S \
+ arm/floor.S \
endif
@@ -481,8 +481,10 @@ LOCAL_C_INCLUDES_64 += $(LOCAL_PATH)/upstream-freebsd/lib/msun/ld128/
LOCAL_CLANG := $(libm_clang)
LOCAL_ARM_MODE := arm
LOCAL_CFLAGS := \
+ -D__BIONIC_NO_MATH_INLINES \
-DFLT_EVAL_METHOD=0 \
-include $(LOCAL_PATH)/freebsd-compat.h \
+ -Werror \
-Wno-missing-braces \
-Wno-parentheses \
-Wno-sign-compare \
diff --git a/libm/arm/e_sqrtf.S b/libm/arm/e_sqrtf.S
deleted file mode 100644
index ddefb22..0000000
--- a/libm/arm/e_sqrtf.S
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2013-2014, NVIDIA Corporation. All rights reserved.
- * Johhnny Qiu <joqiu@nvidia.com>
- * Shu Zhang <chazhang@nvidia.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- * * Neither the name of The Linux Foundation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
- * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
- * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <private/bionic_asm.h>
-
-ENTRY(sqrtf)
- vmov.f32 s0, r0
- vsqrt.f32 s0, s0
- vmov.f32 r0, s0
- bx lr
-END(sqrtf)
diff --git a/libm/arm/s_floor.S b/libm/arm/floor.S
index 3af8f76..3af8f76 100644
--- a/libm/arm/s_floor.S
+++ b/libm/arm/floor.S
diff --git a/libm/arm/e_sqrt.S b/libm/arm/sqrt.S
index 17312f5..f2981f4 100644
--- a/libm/arm/e_sqrt.S
+++ b/libm/arm/sqrt.S
@@ -39,4 +39,11 @@ ENTRY(sqrt)
bx lr
END(sqrt)
+ENTRY(sqrtf)
+ vmov.f32 s0, r0
+ vsqrt.f32 s0, s0
+ vmov.f32 r0, s0
+ bx lr
+END(sqrtf)
+
ALIAS_SYMBOL(sqrtl, sqrt);
diff --git a/libm/arm64/fenv.c b/libm/arm64/fenv.c
index ce560a7..19a2393 100644
--- a/libm/arm64/fenv.c
+++ b/libm/arm64/fenv.c
@@ -26,6 +26,7 @@
* $FreeBSD: libm/aarch64/fenv.c $
*/
+#include <stdint.h>
#include <fenv.h>
#define FPCR_EXCEPT_SHIFT 8
@@ -38,10 +39,20 @@ const fenv_t __fe_dfl_env = { 0 /* control */, 0 /* status */};
typedef __uint32_t fpu_control_t; // FPCR, Floating-point Control Register.
typedef __uint32_t fpu_status_t; // FPSR, Floating-point Status Register.
-#define __get_fpcr(__fpcr) __asm__ __volatile__("mrs %0,fpcr" : "=r" (__fpcr))
-#define __get_fpsr(__fpsr) __asm__ __volatile__("mrs %0,fpsr" : "=r" (__fpsr))
-#define __set_fpcr(__fpcr) __asm__ __volatile__("msr fpcr,%0" : :"ri" (__fpcr))
-#define __set_fpsr(__fpsr) __asm__ __volatile__("msr fpsr,%0" : :"ri" (__fpsr))
+#define __get(REGISTER, __value) { \
+ uint64_t __value64; \
+ __asm__ __volatile__("mrs %0," REGISTER : "=r" (__value64)); \
+ __value = (__uint32_t) __value64; \
+}
+#define __get_fpcr(__fpcr) __get("fpcr", __fpcr)
+#define __get_fpsr(__fpsr) __get("fpsr", __fpsr)
+
+#define __set(REGISTER, __value) { \
+ uint64_t __value64 = __value; \
+ __asm__ __volatile__("msr " REGISTER ",%0" : : "ri" (__value64)); \
+}
+#define __set_fpcr(__fpcr) __set("fpcr", __fpcr)
+#define __set_fpsr(__fpsr) __set("fpsr", __fpsr)
int fegetenv(fenv_t* envp) {
__get_fpcr(envp->__control);
diff --git a/libm/fabs.cpp b/libm/fabs.cpp
new file mode 100644
index 0000000..add73fe
--- /dev/null
+++ b/libm/fabs.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <math.h>
+
+#include "fpmath.h"
+
+double fabs(double x) {
+#if __arm__
+ // Both Clang and GCC insist on moving r0/r1 into a double register
+ // and using fabs where bit-twiddling would be a better choice.
+ // They get fabsf right, but we need to be careful in fabsl too.
+ IEEEd2bits u;
+ u.d = x;
+ u.bits.sign = 0;
+ return u.d;
+#else
+ return __builtin_fabs(x);
+#endif
+}
+
+float fabsf(float x) {
+ return __builtin_fabsf(x);
+}
+
+#if defined(__LP64__)
+long double fabsl(long double x) { return __builtin_fabsl(x); }
+#else
+long double fabsl(long double x) {
+ // Don't use __builtin_fabs here because of ARM. (See fabs above.)
+ return fabs(x);
+}
+#endif
diff --git a/libm/fake_long_double.c b/libm/fake_long_double.c
index 317a115..5edf839 100644
--- a/libm/fake_long_double.c
+++ b/libm/fake_long_double.c
@@ -25,7 +25,6 @@
*/
long double copysignl(long double a1, long double a2) { return copysign(a1, a2); }
-long double fabsl(long double a1) { return fabs(a1); }
long double fmaxl(long double a1, long double a2) { return fmax(a1, a2); }
long double fmodl(long double a1, long double a2) { return fmod(a1, a2); }
long double fminl(long double a1, long double a2) { return fmin(a1, a2); }
diff --git a/libm/include/math.h b/libm/include/math.h
index 1542374..ce8e3b2 100644
--- a/libm/include/math.h
+++ b/libm/include/math.h
@@ -15,116 +15,70 @@
*/
#ifndef _MATH_H_
-#define _MATH_H_
+#define _MATH_H_
#include <sys/cdefs.h>
#include <limits.h>
+#if !defined(__BIONIC_NO_MATH_INLINES)
+#define __BIONIC_MATH_INLINE(__def) extern __inline__ __always_inline __attribute__((gnu_inline)) __attribute__((__artificial__)) __def
+#else
+#define __BIONIC_MATH_INLINE(__def)
+#endif
+
__BEGIN_DECLS
#pragma GCC visibility push(default)
-/*
- * ANSI/POSIX
- */
-extern const union __infinity_un {
- unsigned char __uc[8];
- double __ud;
-} __infinity;
-
-extern const union __nan_un {
- unsigned char __uc[sizeof(float)];
- float __uf;
-} __nan;
-
-#if __GNUC_PREREQ(3, 3) || (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 800)
-#define __MATH_BUILTIN_CONSTANTS
-#endif
+#define HUGE_VAL __builtin_huge_val()
-#if __GNUC_PREREQ(3, 0) && !defined(__INTEL_COMPILER)
-#define __MATH_BUILTIN_RELOPS
-#endif
+#if __ISO_C_VISIBLE >= 1999
+#define FP_ILOGB0 (-INT_MAX)
+#define FP_ILOGBNAN INT_MAX
-#ifdef __MATH_BUILTIN_CONSTANTS
-#define HUGE_VAL __builtin_huge_val()
-#else
-#define HUGE_VAL (__infinity.__ud)
-#endif
+#define HUGE_VALF __builtin_huge_valf()
+#define HUGE_VALL __builtin_huge_vall()
+#define INFINITY __builtin_inff()
+#define NAN __builtin_nanf("")
-#if __ISO_C_VISIBLE >= 1999
-#define FP_ILOGB0 (-INT_MAX) /* Android-changed */
-#define FP_ILOGBNAN INT_MAX /* Android-changed */
-
-#ifdef __MATH_BUILTIN_CONSTANTS
-#define HUGE_VALF __builtin_huge_valf()
-#define HUGE_VALL __builtin_huge_vall()
-#define INFINITY __builtin_inff()
-#define NAN __builtin_nanf("")
-#else
-#define HUGE_VALF (float)HUGE_VAL
-#define HUGE_VALL (long double)HUGE_VAL
-#define INFINITY HUGE_VALF
-#define NAN (__nan.__uf)
-#endif /* __MATH_BUILTIN_CONSTANTS */
-
-#define MATH_ERRNO 1
-#define MATH_ERREXCEPT 2
-#define math_errhandling MATH_ERREXCEPT
-
-#define FP_FAST_FMAF 1
-#ifdef __ia64__
-#define FP_FAST_FMA 1
-#define FP_FAST_FMAL 1
+#define MATH_ERRNO 1
+#define MATH_ERREXCEPT 2
+#define math_errhandling MATH_ERREXCEPT
+
+#if defined(__FP_FAST_FMA)
+#define FP_FAST_FMA 1
+#endif
+#if defined(__FP_FAST_FMAF)
+#define FP_FAST_FMAF 1
+#endif
+#if defined(__FP_FAST_FMAL)
+#define FP_FAST_FMAL 1
#endif
/* Symbolic constants to classify floating point numbers. */
-#define FP_INFINITE 0x01
-#define FP_NAN 0x02
-#define FP_NORMAL 0x04
-#define FP_SUBNORMAL 0x08
-#define FP_ZERO 0x10
-#define fpclassify(x) \
- ((sizeof (x) == sizeof (float)) ? __fpclassifyf(x) \
- : (sizeof (x) == sizeof (double)) ? __fpclassifyd(x) \
- : __fpclassifyl(x))
-
-#define isfinite(x) \
- ((sizeof (x) == sizeof (float)) ? __isfinitef(x) \
- : (sizeof (x) == sizeof (double)) ? __isfinite(x) \
- : __isfinitel(x))
-#define isinf(x) \
- ((sizeof (x) == sizeof (float)) ? __isinff(x) \
- : (sizeof (x) == sizeof (double)) ? isinf(x) \
- : __isinfl(x))
-#define isnan(x) \
- ((sizeof (x) == sizeof (float)) ? __isnanf(x) \
- : (sizeof (x) == sizeof (double)) ? isnan(x) \
- : __isnanl(x))
-#define isnormal(x) \
- ((sizeof (x) == sizeof (float)) ? __isnormalf(x) \
- : (sizeof (x) == sizeof (double)) ? __isnormal(x) \
- : __isnormall(x))
-
-#ifdef __MATH_BUILTIN_RELOPS
-#define isgreater(x, y) __builtin_isgreater((x), (y))
-#define isgreaterequal(x, y) __builtin_isgreaterequal((x), (y))
-#define isless(x, y) __builtin_isless((x), (y))
-#define islessequal(x, y) __builtin_islessequal((x), (y))
-#define islessgreater(x, y) __builtin_islessgreater((x), (y))
-#define isunordered(x, y) __builtin_isunordered((x), (y))
-#else
-#define isgreater(x, y) (!isunordered((x), (y)) && (x) > (y))
-#define isgreaterequal(x, y) (!isunordered((x), (y)) && (x) >= (y))
-#define isless(x, y) (!isunordered((x), (y)) && (x) < (y))
-#define islessequal(x, y) (!isunordered((x), (y)) && (x) <= (y))
-#define islessgreater(x, y) (!isunordered((x), (y)) && \
- ((x) > (y) || (y) > (x)))
-#define isunordered(x, y) (isnan(x) || isnan(y))
-#endif /* __MATH_BUILTIN_RELOPS */
-
-#define signbit(x) \
- ((sizeof (x) == sizeof (float)) ? __signbitf(x) \
- : (sizeof (x) == sizeof (double)) ? __signbit(x) \
- : __signbitl(x))
+#define FP_INFINITE 0x01
+#define FP_NAN 0x02
+#define FP_NORMAL 0x04
+#define FP_SUBNORMAL 0x08
+#define FP_ZERO 0x10
+#define fpclassify(x) \
+ __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, x)
+
+#define isfinite(x) __builtin_isfinite(x)
+#define isinf(x) __builtin_isinf(x)
+#define isnan(x) __builtin_isnan(x)
+#define isnormal(x) __builtin_isnormal(x)
+
+#define isgreater(x, y) __builtin_isgreater((x), (y))
+#define isgreaterequal(x, y) __builtin_isgreaterequal((x), (y))
+#define isless(x, y) __builtin_isless((x), (y))
+#define islessequal(x, y) __builtin_islessequal((x), (y))
+#define islessgreater(x, y) __builtin_islessgreater((x), (y))
+#define isunordered(x, y) __builtin_isunordered((x), (y))
+
+#define signbit(x) \
+ ((sizeof(x) == sizeof(float)) ? __builtin_signbitf(x) \
+ : (sizeof(x) == sizeof(double)) ? __builtin_signbit(x) \
+ : __builtin_signbitl(x))
typedef double __double_t;
typedef __double_t double_t;
@@ -213,6 +167,7 @@ double sqrt(double);
double ceil(double);
double fabs(double) __pure2;
+__BIONIC_MATH_INLINE(double fabs(double x) { return __builtin_fabs(x); })
double floor(double);
double fmod(double, double);
@@ -331,6 +286,7 @@ float sqrtf(float);
float ceilf(float);
float fabsf(float) __pure2;
+__BIONIC_MATH_INLINE(float fabsf(float x) { return __builtin_fabsf(x); })
float floorf(float);
float fmodf(float, float);
float roundf(float);
@@ -418,6 +374,7 @@ long double exp2l(long double);
long double expl(long double);
long double expm1l(long double);
long double fabsl(long double) __pure2;
+__BIONIC_MATH_INLINE(long double fabsl(long double x) { return __builtin_fabsl(x); })
long double fdiml(long double, long double);
long double floorl(long double);
long double fmal(long double, long double, long double);
diff --git a/libm/upstream-freebsd/lib/msun/ld128/k_expl.h b/libm/upstream-freebsd/lib/msun/ld128/k_expl.h
index a5668fd..e843d43 100644
--- a/libm/upstream-freebsd/lib/msun/ld128/k_expl.h
+++ b/libm/upstream-freebsd/lib/msun/ld128/k_expl.h
@@ -29,7 +29,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/lib/msun/ld128/k_expl.h 275819 2014-12-16 09:21:56Z ed $");
/*
* ld128 version of k_expl.h. See ../ld80/s_expl.c for most comments.
@@ -322,7 +322,7 @@ __ldexp_cexpl(long double complex z, int expt)
scale2 = 1;
SET_LDBL_EXPSIGN(scale1, BIAS + expt - half_expt);
- return (cpackl(cos(y) * exp_x * scale1 * scale2,
+ return (CMPLXL(cos(y) * exp_x * scale1 * scale2,
sinl(y) * exp_x * scale1 * scale2));
}
#endif /* _COMPLEX_H */
diff --git a/libm/upstream-freebsd/lib/msun/src/catrig.c b/libm/upstream-freebsd/lib/msun/src/catrig.c
index 200977c..050a88b 100644
--- a/libm/upstream-freebsd/lib/msun/src/catrig.c
+++ b/libm/upstream-freebsd/lib/msun/src/catrig.c
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/lib/msun/src/catrig.c 275819 2014-12-16 09:21:56Z ed $");
#include <complex.h>
#include <float.h>
@@ -286,19 +286,19 @@ casinh(double complex z)
if (isnan(x) || isnan(y)) {
/* casinh(+-Inf + I*NaN) = +-Inf + I*NaN */
if (isinf(x))
- return (cpack(x, y + y));
+ return (CMPLX(x, y + y));
/* casinh(NaN + I*+-Inf) = opt(+-)Inf + I*NaN */
if (isinf(y))
- return (cpack(y, x + x));
+ return (CMPLX(y, x + x));
/* casinh(NaN + I*0) = NaN + I*0 */
if (y == 0)
- return (cpack(x + x, y));
+ return (CMPLX(x + x, y));
/*
* All other cases involving NaN return NaN + I*NaN.
* C99 leaves it optional whether to raise invalid if one of
* the arguments is not NaN, so we opt not to raise it.
*/
- return (cpack(x + 0.0L + (y + 0), x + 0.0L + (y + 0)));
+ return (CMPLX(x + 0.0L + (y + 0), x + 0.0L + (y + 0)));
}
if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) {
@@ -307,7 +307,7 @@ casinh(double complex z)
w = clog_for_large_values(z) + m_ln2;
else
w = clog_for_large_values(-z) + m_ln2;
- return (cpack(copysign(creal(w), x), copysign(cimag(w), y)));
+ return (CMPLX(copysign(creal(w), x), copysign(cimag(w), y)));
}
/* Avoid spuriously raising inexact for z = 0. */
@@ -325,7 +325,7 @@ casinh(double complex z)
ry = asin(B);
else
ry = atan2(new_y, sqrt_A2my2);
- return (cpack(copysign(rx, x), copysign(ry, y)));
+ return (CMPLX(copysign(rx, x), copysign(ry, y)));
}
/*
@@ -335,9 +335,9 @@ casinh(double complex z)
double complex
casin(double complex z)
{
- double complex w = casinh(cpack(cimag(z), creal(z)));
+ double complex w = casinh(CMPLX(cimag(z), creal(z)));
- return (cpack(cimag(w), creal(w)));
+ return (CMPLX(cimag(w), creal(w)));
}
/*
@@ -370,19 +370,19 @@ cacos(double complex z)
if (isnan(x) || isnan(y)) {
/* cacos(+-Inf + I*NaN) = NaN + I*opt(-)Inf */
if (isinf(x))
- return (cpack(y + y, -INFINITY));
+ return (CMPLX(y + y, -INFINITY));
/* cacos(NaN + I*+-Inf) = NaN + I*-+Inf */
if (isinf(y))
- return (cpack(x + x, -y));
+ return (CMPLX(x + x, -y));
/* cacos(0 + I*NaN) = PI/2 + I*NaN with inexact */
if (x == 0)
- return (cpack(pio2_hi + pio2_lo, y + y));
+ return (CMPLX(pio2_hi + pio2_lo, y + y));
/*
* All other cases involving NaN return NaN + I*NaN.
* C99 leaves it optional whether to raise invalid if one of
* the arguments is not NaN, so we opt not to raise it.
*/
- return (cpack(x + 0.0L + (y + 0), x + 0.0L + (y + 0)));
+ return (CMPLX(x + 0.0L + (y + 0), x + 0.0L + (y + 0)));
}
if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) {
@@ -392,18 +392,18 @@ cacos(double complex z)
ry = creal(w) + m_ln2;
if (sy == 0)
ry = -ry;
- return (cpack(rx, ry));
+ return (CMPLX(rx, ry));
}
/* Avoid spuriously raising inexact for z = 1. */
if (x == 1 && y == 0)
- return (cpack(0, -y));
+ return (CMPLX(0, -y));
/* All remaining cases are inexact. */
raise_inexact();
if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4)
- return (cpack(pio2_hi - (x - pio2_lo), -y));
+ return (CMPLX(pio2_hi - (x - pio2_lo), -y));
do_hard_work(ay, ax, &ry, &B_is_usable, &B, &sqrt_A2mx2, &new_x);
if (B_is_usable) {
@@ -419,7 +419,7 @@ cacos(double complex z)
}
if (sy == 0)
ry = -ry;
- return (cpack(rx, ry));
+ return (CMPLX(rx, ry));
}
/*
@@ -437,15 +437,15 @@ cacosh(double complex z)
ry = cimag(w);
/* cacosh(NaN + I*NaN) = NaN + I*NaN */
if (isnan(rx) && isnan(ry))
- return (cpack(ry, rx));
+ return (CMPLX(ry, rx));
/* cacosh(NaN + I*+-Inf) = +Inf + I*NaN */
/* cacosh(+-Inf + I*NaN) = +Inf + I*NaN */
if (isnan(rx))
- return (cpack(fabs(ry), rx));
+ return (CMPLX(fabs(ry), rx));
/* cacosh(0 + I*NaN) = NaN + I*NaN */
if (isnan(ry))
- return (cpack(ry, ry));
- return (cpack(fabs(ry), copysign(rx, cimag(z))));
+ return (CMPLX(ry, ry));
+ return (CMPLX(fabs(ry), copysign(rx, cimag(z))));
}
/*
@@ -475,16 +475,16 @@ clog_for_large_values(double complex z)
* this method is still poor since it is uneccessarily slow.
*/
if (ax > DBL_MAX / 2)
- return (cpack(log(hypot(x / m_e, y / m_e)) + 1, atan2(y, x)));
+ return (CMPLX(log(hypot(x / m_e, y / m_e)) + 1, atan2(y, x)));
/*
* Avoid overflow when x or y is large. Avoid underflow when x or
* y is small.
*/
if (ax > QUARTER_SQRT_MAX || ay < SQRT_MIN)
- return (cpack(log(hypot(x, y)), atan2(y, x)));
+ return (CMPLX(log(hypot(x, y)), atan2(y, x)));
- return (cpack(log(ax * ax + ay * ay) / 2, atan2(y, x)));
+ return (CMPLX(log(ax * ax + ay * ay) / 2, atan2(y, x)));
}
/*
@@ -575,30 +575,30 @@ catanh(double complex z)
/* This helps handle many cases. */
if (y == 0 && ax <= 1)
- return (cpack(atanh(x), y));
+ return (CMPLX(atanh(x), y));
/* To ensure the same accuracy as atan(), and to filter out z = 0. */
if (x == 0)
- return (cpack(x, atan(y)));
+ return (CMPLX(x, atan(y)));
if (isnan(x) || isnan(y)) {
/* catanh(+-Inf + I*NaN) = +-0 + I*NaN */
if (isinf(x))
- return (cpack(copysign(0, x), y + y));
+ return (CMPLX(copysign(0, x), y + y));
/* catanh(NaN + I*+-Inf) = sign(NaN)0 + I*+-PI/2 */
if (isinf(y))
- return (cpack(copysign(0, x),
+ return (CMPLX(copysign(0, x),
copysign(pio2_hi + pio2_lo, y)));
/*
* All other cases involving NaN return NaN + I*NaN.
* C99 leaves it optional whether to raise invalid if one of
* the arguments is not NaN, so we opt not to raise it.
*/
- return (cpack(x + 0.0L + (y + 0), x + 0.0L + (y + 0)));
+ return (CMPLX(x + 0.0L + (y + 0), x + 0.0L + (y + 0)));
}
if (ax > RECIP_EPSILON || ay > RECIP_EPSILON)
- return (cpack(real_part_reciprocal(x, y),
+ return (CMPLX(real_part_reciprocal(x, y),
copysign(pio2_hi + pio2_lo, y)));
if (ax < SQRT_3_EPSILON / 2 && ay < SQRT_3_EPSILON / 2) {
@@ -623,7 +623,7 @@ catanh(double complex z)
else
ry = atan2(2 * ay, (1 - ax) * (1 + ax) - ay * ay) / 2;
- return (cpack(copysign(rx, x), copysign(ry, y)));
+ return (CMPLX(copysign(rx, x), copysign(ry, y)));
}
/*
@@ -633,7 +633,7 @@ catanh(double complex z)
double complex
catan(double complex z)
{
- double complex w = catanh(cpack(cimag(z), creal(z)));
+ double complex w = catanh(CMPLX(cimag(z), creal(z)));
- return (cpack(cimag(w), creal(w)));
+ return (CMPLX(cimag(w), creal(w)));
}
diff --git a/libm/upstream-freebsd/lib/msun/src/catrigf.c b/libm/upstream-freebsd/lib/msun/src/catrigf.c
index 08ebef7..e057d31 100644
--- a/libm/upstream-freebsd/lib/msun/src/catrigf.c
+++ b/libm/upstream-freebsd/lib/msun/src/catrigf.c
@@ -39,7 +39,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/lib/msun/src/catrigf.c 275819 2014-12-16 09:21:56Z ed $");
#include <complex.h>
#include <float.h>
@@ -156,12 +156,12 @@ casinhf(float complex z)
if (isnan(x) || isnan(y)) {
if (isinf(x))
- return (cpackf(x, y + y));
+ return (CMPLXF(x, y + y));
if (isinf(y))
- return (cpackf(y, x + x));
+ return (CMPLXF(y, x + x));
if (y == 0)
- return (cpackf(x + x, y));
- return (cpackf(x + 0.0L + (y + 0), x + 0.0L + (y + 0)));
+ return (CMPLXF(x + x, y));
+ return (CMPLXF(x + 0.0L + (y + 0), x + 0.0L + (y + 0)));
}
if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) {
@@ -169,7 +169,7 @@ casinhf(float complex z)
w = clog_for_large_values(z) + m_ln2;
else
w = clog_for_large_values(-z) + m_ln2;
- return (cpackf(copysignf(crealf(w), x),
+ return (CMPLXF(copysignf(crealf(w), x),
copysignf(cimagf(w), y)));
}
@@ -186,15 +186,15 @@ casinhf(float complex z)
ry = asinf(B);
else
ry = atan2f(new_y, sqrt_A2my2);
- return (cpackf(copysignf(rx, x), copysignf(ry, y)));
+ return (CMPLXF(copysignf(rx, x), copysignf(ry, y)));
}
float complex
casinf(float complex z)
{
- float complex w = casinhf(cpackf(cimagf(z), crealf(z)));
+ float complex w = casinhf(CMPLXF(cimagf(z), crealf(z)));
- return (cpackf(cimagf(w), crealf(w)));
+ return (CMPLXF(cimagf(w), crealf(w)));
}
float complex
@@ -214,12 +214,12 @@ cacosf(float complex z)
if (isnan(x) || isnan(y)) {
if (isinf(x))
- return (cpackf(y + y, -INFINITY));
+ return (CMPLXF(y + y, -INFINITY));
if (isinf(y))
- return (cpackf(x + x, -y));
+ return (CMPLXF(x + x, -y));
if (x == 0)
- return (cpackf(pio2_hi + pio2_lo, y + y));
- return (cpackf(x + 0.0L + (y + 0), x + 0.0L + (y + 0)));
+ return (CMPLXF(pio2_hi + pio2_lo, y + y));
+ return (CMPLXF(x + 0.0L + (y + 0), x + 0.0L + (y + 0)));
}
if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) {
@@ -228,16 +228,16 @@ cacosf(float complex z)
ry = crealf(w) + m_ln2;
if (sy == 0)
ry = -ry;
- return (cpackf(rx, ry));
+ return (CMPLXF(rx, ry));
}
if (x == 1 && y == 0)
- return (cpackf(0, -y));
+ return (CMPLXF(0, -y));
raise_inexact();
if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4)
- return (cpackf(pio2_hi - (x - pio2_lo), -y));
+ return (CMPLXF(pio2_hi - (x - pio2_lo), -y));
do_hard_work(ay, ax, &ry, &B_is_usable, &B, &sqrt_A2mx2, &new_x);
if (B_is_usable) {
@@ -253,7 +253,7 @@ cacosf(float complex z)
}
if (sy == 0)
ry = -ry;
- return (cpackf(rx, ry));
+ return (CMPLXF(rx, ry));
}
float complex
@@ -266,12 +266,12 @@ cacoshf(float complex z)
rx = crealf(w);
ry = cimagf(w);
if (isnan(rx) && isnan(ry))
- return (cpackf(ry, rx));
+ return (CMPLXF(ry, rx));
if (isnan(rx))
- return (cpackf(fabsf(ry), rx));
+ return (CMPLXF(fabsf(ry), rx));
if (isnan(ry))
- return (cpackf(ry, ry));
- return (cpackf(fabsf(ry), copysignf(rx, cimagf(z))));
+ return (CMPLXF(ry, ry));
+ return (CMPLXF(fabsf(ry), copysignf(rx, cimagf(z))));
}
static float complex
@@ -291,13 +291,13 @@ clog_for_large_values(float complex z)
}
if (ax > FLT_MAX / 2)
- return (cpackf(logf(hypotf(x / m_e, y / m_e)) + 1,
+ return (CMPLXF(logf(hypotf(x / m_e, y / m_e)) + 1,
atan2f(y, x)));
if (ax > QUARTER_SQRT_MAX || ay < SQRT_MIN)
- return (cpackf(logf(hypotf(x, y)), atan2f(y, x)));
+ return (CMPLXF(logf(hypotf(x, y)), atan2f(y, x)));
- return (cpackf(logf(ax * ax + ay * ay) / 2, atan2f(y, x)));
+ return (CMPLXF(logf(ax * ax + ay * ay) / 2, atan2f(y, x)));
}
static inline float
@@ -346,22 +346,22 @@ catanhf(float complex z)
ay = fabsf(y);
if (y == 0 && ax <= 1)
- return (cpackf(atanhf(x), y));
+ return (CMPLXF(atanhf(x), y));
if (x == 0)
- return (cpackf(x, atanf(y)));
+ return (CMPLXF(x, atanf(y)));
if (isnan(x) || isnan(y)) {
if (isinf(x))
- return (cpackf(copysignf(0, x), y + y));
+ return (CMPLXF(copysignf(0, x), y + y));
if (isinf(y))
- return (cpackf(copysignf(0, x),
+ return (CMPLXF(copysignf(0, x),
copysignf(pio2_hi + pio2_lo, y)));
- return (cpackf(x + 0.0L + (y + 0), x + 0.0L + (y + 0)));
+ return (CMPLXF(x + 0.0L + (y + 0), x + 0.0L + (y + 0)));
}
if (ax > RECIP_EPSILON || ay > RECIP_EPSILON)
- return (cpackf(real_part_reciprocal(x, y),
+ return (CMPLXF(real_part_reciprocal(x, y),
copysignf(pio2_hi + pio2_lo, y)));
if (ax < SQRT_3_EPSILON / 2 && ay < SQRT_3_EPSILON / 2) {
@@ -381,13 +381,13 @@ catanhf(float complex z)
else
ry = atan2f(2 * ay, (1 - ax) * (1 + ax) - ay * ay) / 2;
- return (cpackf(copysignf(rx, x), copysignf(ry, y)));
+ return (CMPLXF(copysignf(rx, x), copysignf(ry, y)));
}
float complex
catanf(float complex z)
{
- float complex w = catanhf(cpackf(cimagf(z), crealf(z)));
+ float complex w = catanhf(CMPLXF(cimagf(z), crealf(z)));
- return (cpackf(cimagf(w), crealf(w)));
+ return (CMPLXF(cimagf(w), crealf(w)));
}
diff --git a/libm/upstream-freebsd/lib/msun/src/e_j0.c b/libm/upstream-freebsd/lib/msun/src/e_j0.c
index 8320f25..36e72c2 100644
--- a/libm/upstream-freebsd/lib/msun/src/e_j0.c
+++ b/libm/upstream-freebsd/lib/msun/src/e_j0.c
@@ -12,7 +12,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/lib/msun/src/e_j0.c 283032 2015-05-17 16:27:06Z kargl $");
/* __ieee754_j0(x), __ieee754_y0(x)
* Bessel function of the first and second kinds of order zero.
@@ -62,7 +62,9 @@ __FBSDID("$FreeBSD$");
#include "math.h"
#include "math_private.h"
-static double pzero(double), qzero(double);
+static __inline double pzero(double), qzero(double);
+
+static const volatile double vone = 1, vzero = 0;
static const double
huge = 1e300,
@@ -115,7 +117,7 @@ __ieee754_j0(double x)
if(ix<0x3f200000) { /* |x| < 2**-13 */
if(huge+x>one) { /* raise inexact if x != 0 */
if(ix<0x3e400000) return one; /* |x|<2**-27 */
- else return one - 0.25*x*x;
+ else return one - x*x/4;
}
}
z = x*x;
@@ -150,10 +152,16 @@ __ieee754_y0(double x)
EXTRACT_WORDS(hx,lx,x);
ix = 0x7fffffff&hx;
- /* Y0(NaN) is NaN, y0(-inf) is Nan, y0(inf) is 0 */
- if(ix>=0x7ff00000) return one/(x+x*x);
- if((ix|lx)==0) return -one/zero;
- if(hx<0) return zero/zero;
+ /*
+ * y0(NaN) = NaN.
+ * y0(Inf) = 0.
+ * y0(-Inf) = NaN and raise invalid exception.
+ */
+ if(ix>=0x7ff00000) return vone/(x+x*x);
+ /* y0(+-0) = -inf and raise divide-by-zero exception. */
+ if((ix|lx)==0) return -one/vzero;
+ /* y0(x<0) = NaN and raise invalid exception. */
+ if(hx<0) return vzero/vzero;
if(ix >= 0x40000000) { /* |x| >= 2.0 */
/* y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x0)+q0(x)*cos(x0))
* where x0 = x-pi/4
@@ -268,7 +276,8 @@ static const double pS2[5] = {
1.46576176948256193810e+01, /* 0x402D50B3, 0x44391809 */
};
- static double pzero(double x)
+static __inline double
+pzero(double x)
{
const double *p,*q;
double z,r,s;
@@ -278,7 +287,7 @@ static const double pS2[5] = {
if(ix>=0x40200000) {p = pR8; q= pS8;}
else if(ix>=0x40122E8B){p = pR5; q= pS5;}
else if(ix>=0x4006DB6D){p = pR3; q= pS3;}
- else if(ix>=0x40000000){p = pR2; q= pS2;}
+ else {p = pR2; q= pS2;} /* ix>=0x40000000 */
z = one/(x*x);
r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4]))));
@@ -363,7 +372,8 @@ static const double qS2[6] = {
-5.31095493882666946917e+00, /* 0xC0153E6A, 0xF8B32931 */
};
- static double qzero(double x)
+static __inline double
+qzero(double x)
{
const double *p,*q;
double s,r,z;
@@ -373,7 +383,7 @@ static const double qS2[6] = {
if(ix>=0x40200000) {p = qR8; q= qS8;}
else if(ix>=0x40122E8B){p = qR5; q= qS5;}
else if(ix>=0x4006DB6D){p = qR3; q= qS3;}
- else if(ix>=0x40000000){p = qR2; q= qS2;}
+ else {p = qR2; q= qS2;} /* ix>=0x40000000 */
z = one/(x*x);
r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5])))));
diff --git a/libm/upstream-freebsd/lib/msun/src/e_j0f.c b/libm/upstream-freebsd/lib/msun/src/e_j0f.c
index c45faf3..e53b218 100644
--- a/libm/upstream-freebsd/lib/msun/src/e_j0f.c
+++ b/libm/upstream-freebsd/lib/msun/src/e_j0f.c
@@ -14,12 +14,18 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/lib/msun/src/e_j0f.c 283032 2015-05-17 16:27:06Z kargl $");
+
+/*
+ * See e_j0.c for complete comments.
+ */
#include "math.h"
#include "math_private.h"
-static float pzerof(float), qzerof(float);
+static __inline float pzerof(float), qzerof(float);
+
+static const volatile float vone = 1, vzero = 0;
static const float
huge = 1e30,
@@ -62,17 +68,17 @@ __ieee754_j0f(float x)
* j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
* y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
*/
- if(ix>0x80000000) z = (invsqrtpi*cc)/sqrtf(x);
+ if(ix>0x58000000) z = (invsqrtpi*cc)/sqrtf(x); /* |x|>2**49 */
else {
u = pzerof(x); v = qzerof(x);
z = invsqrtpi*(u*cc-v*ss)/sqrtf(x);
}
return z;
}
- if(ix<0x39000000) { /* |x| < 2**-13 */
+ if(ix<0x3b000000) { /* |x| < 2**-9 */
if(huge+x>one) { /* raise inexact if x != 0 */
- if(ix<0x32000000) return one; /* |x|<2**-27 */
- else return one - (float)0.25*x*x;
+ if(ix<0x39800000) return one; /* |x|<2**-12 */
+ else return one - x*x/4;
}
}
z = x*x;
@@ -107,10 +113,9 @@ __ieee754_y0f(float x)
GET_FLOAT_WORD(hx,x);
ix = 0x7fffffff&hx;
- /* Y0(NaN) is NaN, y0(-inf) is Nan, y0(inf) is 0 */
- if(ix>=0x7f800000) return one/(x+x*x);
- if(ix==0) return -one/zero;
- if(hx<0) return zero/zero;
+ if(ix>=0x7f800000) return vone/(x+x*x);
+ if(ix==0) return -one/vzero;
+ if(hx<0) return vzero/vzero;
if(ix >= 0x40000000) { /* |x| >= 2.0 */
/* y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x0)+q0(x)*cos(x0))
* where x0 = x-pi/4
@@ -136,14 +141,14 @@ __ieee754_y0f(float x)
if ((s*c)<zero) cc = z/ss;
else ss = z/cc;
}
- if(ix>0x80000000) z = (invsqrtpi*ss)/sqrtf(x);
+ if(ix>0x58000000) z = (invsqrtpi*ss)/sqrtf(x); /* |x|>2**49 */
else {
u = pzerof(x); v = qzerof(x);
z = invsqrtpi*(u*ss+v*cc)/sqrtf(x);
}
return z;
}
- if(ix<=0x32000000) { /* x < 2**-27 */
+ if(ix<=0x39000000) { /* x < 2**-13 */
return(u00 + tpi*__ieee754_logf(x));
}
z = x*x;
@@ -224,7 +229,8 @@ static const float pS2[5] = {
1.4657617569e+01, /* 0x416a859a */
};
- static float pzerof(float x)
+static __inline float
+pzerof(float x)
{
const float *p,*q;
float z,r,s;
@@ -232,9 +238,9 @@ static const float pS2[5] = {
GET_FLOAT_WORD(ix,x);
ix &= 0x7fffffff;
if(ix>=0x41000000) {p = pR8; q= pS8;}
- else if(ix>=0x40f71c58){p = pR5; q= pS5;}
- else if(ix>=0x4036db68){p = pR3; q= pS3;}
- else if(ix>=0x40000000){p = pR2; q= pS2;}
+ else if(ix>=0x409173eb){p = pR5; q= pS5;}
+ else if(ix>=0x4036d917){p = pR3; q= pS3;}
+ else {p = pR2; q= pS2;} /* ix>=0x40000000 */
z = one/(x*x);
r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4]))));
@@ -319,7 +325,8 @@ static const float qS2[6] = {
-5.3109550476e+00, /* 0xc0a9f358 */
};
- static float qzerof(float x)
+static __inline float
+qzerof(float x)
{
const float *p,*q;
float s,r,z;
@@ -327,9 +334,9 @@ static const float qS2[6] = {
GET_FLOAT_WORD(ix,x);
ix &= 0x7fffffff;
if(ix>=0x41000000) {p = qR8; q= qS8;}
- else if(ix>=0x40f71c58){p = qR5; q= qS5;}
- else if(ix>=0x4036db68){p = qR3; q= qS3;}
- else if(ix>=0x40000000){p = qR2; q= qS2;}
+ else if(ix>=0x409173eb){p = qR5; q= qS5;}
+ else if(ix>=0x4036d917){p = qR3; q= qS3;}
+ else {p = qR2; q= qS2;} /* ix>=0x40000000 */
z = one/(x*x);
r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5])))));
diff --git a/libm/upstream-freebsd/lib/msun/src/e_j1.c b/libm/upstream-freebsd/lib/msun/src/e_j1.c
index 63800ad..b11ac2d 100644
--- a/libm/upstream-freebsd/lib/msun/src/e_j1.c
+++ b/libm/upstream-freebsd/lib/msun/src/e_j1.c
@@ -12,7 +12,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/lib/msun/src/e_j1.c 283032 2015-05-17 16:27:06Z kargl $");
/* __ieee754_j1(x), __ieee754_y1(x)
* Bessel function of the first and second kinds of order zero.
@@ -62,7 +62,9 @@ __FBSDID("$FreeBSD$");
#include "math.h"
#include "math_private.h"
-static double pone(double), qone(double);
+static __inline double pone(double), qone(double);
+
+static const volatile double vone = 1, vzero = 0;
static const double
huge = 1e300,
@@ -147,10 +149,16 @@ __ieee754_y1(double x)
EXTRACT_WORDS(hx,lx,x);
ix = 0x7fffffff&hx;
- /* if Y1(NaN) is NaN, Y1(-inf) is NaN, Y1(inf) is 0 */
- if(ix>=0x7ff00000) return one/(x+x*x);
- if((ix|lx)==0) return -one/zero;
- if(hx<0) return zero/zero;
+ /*
+ * y1(NaN) = NaN.
+ * y1(Inf) = 0.
+ * y1(-Inf) = NaN and raise invalid exception.
+ */
+ if(ix>=0x7ff00000) return vone/(x+x*x);
+ /* y1(+-0) = -inf and raise divide-by-zero exception. */
+ if((ix|lx)==0) return -one/vzero;
+ /* y1(x<0) = NaN and raise invalid exception. */
+ if(hx<0) return vzero/vzero;
if(ix >= 0x40000000) { /* |x| >= 2.0 */
s = sin(x);
c = cos(x);
@@ -262,7 +270,8 @@ static const double ps2[5] = {
8.36463893371618283368e+00, /* 0x4020BAB1, 0xF44E5192 */
};
- static double pone(double x)
+static __inline double
+pone(double x)
{
const double *p,*q;
double z,r,s;
@@ -272,7 +281,7 @@ static const double ps2[5] = {
if(ix>=0x40200000) {p = pr8; q= ps8;}
else if(ix>=0x40122E8B){p = pr5; q= ps5;}
else if(ix>=0x4006DB6D){p = pr3; q= ps3;}
- else if(ix>=0x40000000){p = pr2; q= ps2;}
+ else {p = pr2; q= ps2;} /* ix>=0x40000000 */
z = one/(x*x);
r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4]))));
@@ -358,7 +367,8 @@ static const double qs2[6] = {
-4.95949898822628210127e+00, /* 0xC013D686, 0xE71BE86B */
};
- static double qone(double x)
+static __inline double
+qone(double x)
{
const double *p,*q;
double s,r,z;
@@ -368,7 +378,7 @@ static const double qs2[6] = {
if(ix>=0x40200000) {p = qr8; q= qs8;}
else if(ix>=0x40122E8B){p = qr5; q= qs5;}
else if(ix>=0x4006DB6D){p = qr3; q= qs3;}
- else if(ix>=0x40000000){p = qr2; q= qs2;}
+ else {p = qr2; q= qs2;} /* ix>=0x40000000 */
z = one/(x*x);
r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5])))));
diff --git a/libm/upstream-freebsd/lib/msun/src/e_j1f.c b/libm/upstream-freebsd/lib/msun/src/e_j1f.c
index 88e2d83..0cca823 100644
--- a/libm/upstream-freebsd/lib/msun/src/e_j1f.c
+++ b/libm/upstream-freebsd/lib/msun/src/e_j1f.c
@@ -14,12 +14,18 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/lib/msun/src/e_j1f.c 283032 2015-05-17 16:27:06Z kargl $");
+
+/*
+ * See e_j1.c for complete comments.
+ */
#include "math.h"
#include "math_private.h"
-static float ponef(float), qonef(float);
+static __inline float ponef(float), qonef(float);
+
+static const volatile float vone = 1, vzero = 0;
static const float
huge = 1e30,
@@ -63,7 +69,7 @@ __ieee754_j1f(float x)
* j1(x) = 1/sqrt(pi) * (P(1,x)*cc - Q(1,x)*ss) / sqrt(x)
* y1(x) = 1/sqrt(pi) * (P(1,x)*ss + Q(1,x)*cc) / sqrt(x)
*/
- if(ix>0x80000000) z = (invsqrtpi*cc)/sqrtf(y);
+ if(ix>0x58000000) z = (invsqrtpi*cc)/sqrtf(y); /* |x|>2**49 */
else {
u = ponef(y); v = qonef(y);
z = invsqrtpi*(u*cc-v*ss)/sqrtf(y);
@@ -71,7 +77,7 @@ __ieee754_j1f(float x)
if(hx<0) return -z;
else return z;
}
- if(ix<0x32000000) { /* |x|<2**-27 */
+ if(ix<0x39000000) { /* |x|<2**-13 */
if(huge+x>one) return (float)0.5*x;/* inexact if x!=0 necessary */
}
z = x*x;
@@ -104,10 +110,9 @@ __ieee754_y1f(float x)
GET_FLOAT_WORD(hx,x);
ix = 0x7fffffff&hx;
- /* if Y1(NaN) is NaN, Y1(-inf) is NaN, Y1(inf) is 0 */
- if(ix>=0x7f800000) return one/(x+x*x);
- if(ix==0) return -one/zero;
- if(hx<0) return zero/zero;
+ if(ix>=0x7f800000) return vone/(x+x*x);
+ if(ix==0) return -one/vzero;
+ if(hx<0) return vzero/vzero;
if(ix >= 0x40000000) { /* |x| >= 2.0 */
s = sinf(x);
c = cosf(x);
@@ -129,14 +134,14 @@ __ieee754_y1f(float x)
* sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
* to compute the worse one.
*/
- if(ix>0x48000000) z = (invsqrtpi*ss)/sqrtf(x);
+ if(ix>0x58000000) z = (invsqrtpi*ss)/sqrtf(x); /* |x|>2**49 */
else {
u = ponef(x); v = qonef(x);
z = invsqrtpi*(u*ss+v*cc)/sqrtf(x);
}
return z;
}
- if(ix<=0x24800000) { /* x < 2**-54 */
+ if(ix<=0x33000000) { /* x < 2**-25 */
return(-tpi/x);
}
z = x*x;
@@ -219,7 +224,8 @@ static const float ps2[5] = {
8.3646392822e+00, /* 0x4105d590 */
};
- static float ponef(float x)
+static __inline float
+ponef(float x)
{
const float *p,*q;
float z,r,s;
@@ -227,9 +233,9 @@ static const float ps2[5] = {
GET_FLOAT_WORD(ix,x);
ix &= 0x7fffffff;
if(ix>=0x41000000) {p = pr8; q= ps8;}
- else if(ix>=0x40f71c58){p = pr5; q= ps5;}
- else if(ix>=0x4036db68){p = pr3; q= ps3;}
- else if(ix>=0x40000000){p = pr2; q= ps2;}
+ else if(ix>=0x409173eb){p = pr5; q= ps5;}
+ else if(ix>=0x4036d917){p = pr3; q= ps3;}
+ else {p = pr2; q= ps2;} /* ix>=0x40000000 */
z = one/(x*x);
r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4]))));
@@ -315,17 +321,18 @@ static const float qs2[6] = {
-4.9594988823e+00, /* 0xc09eb437 */
};
- static float qonef(float x)
+static __inline float
+qonef(float x)
{
const float *p,*q;
float s,r,z;
int32_t ix;
GET_FLOAT_WORD(ix,x);
ix &= 0x7fffffff;
- if(ix>=0x40200000) {p = qr8; q= qs8;}
- else if(ix>=0x40f71c58){p = qr5; q= qs5;}
- else if(ix>=0x4036db68){p = qr3; q= qs3;}
- else if(ix>=0x40000000){p = qr2; q= qs2;}
+ if(ix>=0x41000000) {p = qr8; q= qs8;}
+ else if(ix>=0x409173eb){p = qr5; q= qs5;}
+ else if(ix>=0x4036d917){p = qr3; q= qs3;}
+ else {p = qr2; q= qs2;} /* ix>=0x40000000 */
z = one/(x*x);
r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5])))));
diff --git a/libm/upstream-freebsd/lib/msun/src/e_jn.c b/libm/upstream-freebsd/lib/msun/src/e_jn.c
index 8b0bc62..a1130c5 100644
--- a/libm/upstream-freebsd/lib/msun/src/e_jn.c
+++ b/libm/upstream-freebsd/lib/msun/src/e_jn.c
@@ -12,7 +12,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/lib/msun/src/e_jn.c 279856 2015-03-10 17:10:54Z kargl $");
/*
* __ieee754_jn(n, x), __ieee754_yn(n, x)
@@ -43,6 +43,8 @@ __FBSDID("$FreeBSD$");
#include "math.h"
#include "math_private.h"
+static const volatile double vone = 1, vzero = 0;
+
static const double
invsqrtpi= 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */
two = 2.00000000000000000000e+00, /* 0x40000000, 0x00000000 */
@@ -220,10 +222,12 @@ __ieee754_yn(int n, double x)
EXTRACT_WORDS(hx,lx,x);
ix = 0x7fffffff&hx;
- /* if Y(n,NaN) is NaN */
+ /* yn(n,NaN) = NaN */
if((ix|((u_int32_t)(lx|-lx))>>31)>0x7ff00000) return x+x;
- if((ix|lx)==0) return -one/zero;
- if(hx<0) return zero/zero;
+ /* yn(n,+-0) = -inf and raise divide-by-zero exception. */
+ if((ix|lx)==0) return -one/vzero;
+ /* yn(n,x<0) = NaN and raise invalid exception. */
+ if(hx<0) return vzero/vzero;
sign = 1;
if(n<0){
n = -n;
diff --git a/libm/upstream-freebsd/lib/msun/src/e_jnf.c b/libm/upstream-freebsd/lib/msun/src/e_jnf.c
index f564aec..c82d5cf 100644
--- a/libm/upstream-freebsd/lib/msun/src/e_jnf.c
+++ b/libm/upstream-freebsd/lib/msun/src/e_jnf.c
@@ -14,11 +14,17 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/lib/msun/src/e_jnf.c 279856 2015-03-10 17:10:54Z kargl $");
+
+/*
+ * See e_jn.c for complete comments.
+ */
#include "math.h"
#include "math_private.h"
+static const volatile float vone = 1, vzero = 0;
+
static const float
two = 2.0000000000e+00, /* 0x40000000 */
one = 1.0000000000e+00; /* 0x3F800000 */
@@ -172,10 +178,9 @@ __ieee754_ynf(int n, float x)
GET_FLOAT_WORD(hx,x);
ix = 0x7fffffff&hx;
- /* if Y(n,NaN) is NaN */
if(ix>0x7f800000) return x+x;
- if(ix==0) return -one/zero;
- if(hx<0) return zero/zero;
+ if(ix==0) return -one/vzero;
+ if(hx<0) return vzero/vzero;
sign = 1;
if(n<0){
n = -n;
diff --git a/libm/upstream-freebsd/lib/msun/src/k_exp.c b/libm/upstream-freebsd/lib/msun/src/k_exp.c
index f592f69..5aa3ef3 100644
--- a/libm/upstream-freebsd/lib/msun/src/k_exp.c
+++ b/libm/upstream-freebsd/lib/msun/src/k_exp.c
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/lib/msun/src/k_exp.c 275819 2014-12-16 09:21:56Z ed $");
#include <complex.h>
@@ -103,6 +103,6 @@ __ldexp_cexp(double complex z, int expt)
half_expt = expt - half_expt;
INSERT_WORDS(scale2, (0x3ff + half_expt) << 20, 0);
- return (cpack(cos(y) * exp_x * scale1 * scale2,
+ return (CMPLX(cos(y) * exp_x * scale1 * scale2,
sin(y) * exp_x * scale1 * scale2));
}
diff --git a/libm/upstream-freebsd/lib/msun/src/k_expf.c b/libm/upstream-freebsd/lib/msun/src/k_expf.c
index 548a008..8fe8c46 100644
--- a/libm/upstream-freebsd/lib/msun/src/k_expf.c
+++ b/libm/upstream-freebsd/lib/msun/src/k_expf.c
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/lib/msun/src/k_expf.c 275819 2014-12-16 09:21:56Z ed $");
#include <complex.h>
@@ -82,6 +82,6 @@ __ldexp_cexpf(float complex z, int expt)
half_expt = expt - half_expt;
SET_FLOAT_WORD(scale2, (0x7f + half_expt) << 23);
- return (cpackf(cosf(y) * exp_x * scale1 * scale2,
+ return (CMPLXF(cosf(y) * exp_x * scale1 * scale2,
sinf(y) * exp_x * scale1 * scale2));
}
diff --git a/libm/upstream-freebsd/lib/msun/src/math_private.h b/libm/upstream-freebsd/lib/msun/src/math_private.h
index 8af2c65..1f10e8b 100644
--- a/libm/upstream-freebsd/lib/msun/src/math_private.h
+++ b/libm/upstream-freebsd/lib/msun/src/math_private.h
@@ -11,7 +11,7 @@
/*
* from: @(#)fdlibm.h 5.1 93/09/24
- * $FreeBSD$
+ * $FreeBSD: head/lib/msun/src/math_private.h 276176 2014-12-24 10:13:53Z ed $
*/
#ifndef _MATH_PRIVATE_H_
@@ -454,9 +454,15 @@ typedef union {
* (0.0+I)*(y+0.0*I) and laboriously computing the full complex product.
* In particular, I*Inf is corrupted to NaN+I*Inf, and I*-0 is corrupted
* to -0.0+I*0.0.
+ *
+ * The C11 standard introduced the macros CMPLX(), CMPLXF() and CMPLXL()
+ * to construct complex values. Compilers that conform to the C99
+ * standard require the following functions to avoid the above issues.
*/
+
+#ifndef CMPLXF
static __inline float complex
-cpackf(float x, float y)
+CMPLXF(float x, float y)
{
float_complex z;
@@ -464,9 +470,11 @@ cpackf(float x, float y)
IMAGPART(z) = y;
return (z.f);
}
+#endif
+#ifndef CMPLX
static __inline double complex
-cpack(double x, double y)
+CMPLX(double x, double y)
{
double_complex z;
@@ -474,9 +482,11 @@ cpack(double x, double y)
IMAGPART(z) = y;
return (z.f);
}
+#endif
+#ifndef CMPLXL
static __inline long double complex
-cpackl(long double x, long double y)
+CMPLXL(long double x, long double y)
{
long_double_complex z;
@@ -484,6 +494,8 @@ cpackl(long double x, long double y)
IMAGPART(z) = y;
return (z.f);
}
+#endif
+
#endif /* _COMPLEX_H */
#ifdef __GNUCLIKE_ASM
diff --git a/libm/upstream-freebsd/lib/msun/src/s_ccosh.c b/libm/upstream-freebsd/lib/msun/src/s_ccosh.c
index 9ea962b..e544e91 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_ccosh.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_ccosh.c
@@ -32,10 +32,12 @@
*
* Exceptional values are noted in the comments within the source code.
* These values and the return value were taken from n1124.pdf.
+ * The sign of the result for some exceptional values is unspecified but
+ * must satisfy both cosh(conj(z)) == conj(cosh(z)) and cosh(-z) == cosh(z).
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/lib/msun/src/s_ccosh.c 284423 2015-06-15 20:11:06Z tijl $");
#include <complex.h>
#include <math.h>
@@ -62,49 +64,48 @@ ccosh(double complex z)
/* Handle the nearly-non-exceptional cases where x and y are finite. */
if (ix < 0x7ff00000 && iy < 0x7ff00000) {
if ((iy | ly) == 0)
- return (cpack(cosh(x), x * y));
- if (ix < 0x40360000) /* small x: normal case */
- return (cpack(cosh(x) * cos(y), sinh(x) * sin(y)));
+ return (CMPLX(cosh(x), x * y));
+ if (ix < 0x40360000) /* |x| < 22: normal case */
+ return (CMPLX(cosh(x) * cos(y), sinh(x) * sin(y)));
/* |x| >= 22, so cosh(x) ~= exp(|x|) */
if (ix < 0x40862e42) {
/* x < 710: exp(|x|) won't overflow */
h = exp(fabs(x)) * 0.5;
- return (cpack(h * cos(y), copysign(h, x) * sin(y)));
+ return (CMPLX(h * cos(y), copysign(h, x) * sin(y)));
} else if (ix < 0x4096bbaa) {
/* x < 1455: scale to avoid overflow */
- z = __ldexp_cexp(cpack(fabs(x), y), -1);
- return (cpack(creal(z), cimag(z) * copysign(1, x)));
+ z = __ldexp_cexp(CMPLX(fabs(x), y), -1);
+ return (CMPLX(creal(z), cimag(z) * copysign(1, x)));
} else {
/* x >= 1455: the result always overflows */
h = huge * x;
- return (cpack(h * h * cos(y), h * sin(y)));
+ return (CMPLX(h * h * cos(y), h * sin(y)));
}
}
/*
- * cosh(+-0 +- I Inf) = dNaN + I sign(d(+-0, dNaN))0.
- * The sign of 0 in the result is unspecified. Choice = normally
- * the same as dNaN. Raise the invalid floating-point exception.
+ * cosh(+-0 +- I Inf) = dNaN + I (+-)(+-)0.
+ * The sign of 0 in the result is unspecified. Choice = product
+ * of the signs of the argument. Raise the invalid floating-point
+ * exception.
*
- * cosh(+-0 +- I NaN) = d(NaN) + I sign(d(+-0, NaN))0.
- * The sign of 0 in the result is unspecified. Choice = normally
- * the same as d(NaN).
+ * cosh(+-0 +- I NaN) = d(NaN) + I (+-)(+-)0.
+ * The sign of 0 in the result is unspecified. Choice = product
+ * of the signs of the argument.
*/
- if ((ix | lx) == 0 && iy >= 0x7ff00000)
- return (cpack(y - y, copysign(0, x * (y - y))));
+ if ((ix | lx) == 0) /* && iy >= 0x7ff00000 */
+ return (CMPLX(y - y, x * copysign(0, y)));
/*
* cosh(+-Inf +- I 0) = +Inf + I (+-)(+-)0.
*
- * cosh(NaN +- I 0) = d(NaN) + I sign(d(NaN, +-0))0.
- * The sign of 0 in the result is unspecified.
+ * cosh(NaN +- I 0) = d(NaN) + I (+-)(+-)0.
+ * The sign of 0 in the result is unspecified. Choice = product
+ * of the signs of the argument.
*/
- if ((iy | ly) == 0 && ix >= 0x7ff00000) {
- if (((hx & 0xfffff) | lx) == 0)
- return (cpack(x * x, copysign(0, x) * y));
- return (cpack(x * x, copysign(0, (x + x) * y)));
- }
+ if ((iy | ly) == 0) /* && ix >= 0x7ff00000 */
+ return (CMPLX(x * x, copysign(0, x) * y));
/*
* cosh(x +- I Inf) = dNaN + I dNaN.
@@ -114,8 +115,8 @@ ccosh(double complex z)
* Optionally raises the invalid floating-point exception for finite
* nonzero x. Choice = don't raise (except for signaling NaNs).
*/
- if (ix < 0x7ff00000 && iy >= 0x7ff00000)
- return (cpack(y - y, x * (y - y)));
+ if (ix < 0x7ff00000) /* && iy >= 0x7ff00000 */
+ return (CMPLX(y - y, x * (y - y)));
/*
* cosh(+-Inf + I NaN) = +Inf + I d(NaN).
@@ -126,10 +127,10 @@ ccosh(double complex z)
*
* cosh(+-Inf + I y) = +Inf cos(y) +- I Inf sin(y)
*/
- if (ix >= 0x7ff00000 && ((hx & 0xfffff) | lx) == 0) {
+ if (ix == 0x7ff00000 && lx == 0) {
if (iy >= 0x7ff00000)
- return (cpack(x * x, x * (y - y)));
- return (cpack((x * x) * cos(y), x * sin(y)));
+ return (CMPLX(INFINITY, x * (y - y)));
+ return (CMPLX(INFINITY * cos(y), x * sin(y)));
}
/*
@@ -143,7 +144,7 @@ ccosh(double complex z)
* Optionally raises the invalid floating-point exception for finite
* nonzero y. Choice = don't raise (except for signaling NaNs).
*/
- return (cpack((x * x) * (y - y), (x + x) * (y - y)));
+ return (CMPLX((x * x) * (y - y), (x + x) * (y - y)));
}
double complex
@@ -151,5 +152,5 @@ ccos(double complex z)
{
/* ccos(z) = ccosh(I * z) */
- return (ccosh(cpack(-cimag(z), creal(z))));
+ return (ccosh(CMPLX(-cimag(z), creal(z))));
}
diff --git a/libm/upstream-freebsd/lib/msun/src/s_ccoshf.c b/libm/upstream-freebsd/lib/msun/src/s_ccoshf.c
index 1de9ad4..e33840a 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_ccoshf.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_ccoshf.c
@@ -25,11 +25,11 @@
*/
/*
- * Hyperbolic cosine of a complex argument. See s_ccosh.c for details.
+ * Float version of ccosh(). See s_ccosh.c for details.
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/lib/msun/src/s_ccoshf.c 284423 2015-06-15 20:11:06Z tijl $");
#include <complex.h>
#include <math.h>
@@ -55,50 +55,47 @@ ccoshf(float complex z)
if (ix < 0x7f800000 && iy < 0x7f800000) {
if (iy == 0)
- return (cpackf(coshf(x), x * y));
- if (ix < 0x41100000) /* small x: normal case */
- return (cpackf(coshf(x) * cosf(y), sinhf(x) * sinf(y)));
+ return (CMPLXF(coshf(x), x * y));
+ if (ix < 0x41100000) /* |x| < 9: normal case */
+ return (CMPLXF(coshf(x) * cosf(y), sinhf(x) * sinf(y)));
/* |x| >= 9, so cosh(x) ~= exp(|x|) */
if (ix < 0x42b17218) {
/* x < 88.7: expf(|x|) won't overflow */
- h = expf(fabsf(x)) * 0.5f;
- return (cpackf(h * cosf(y), copysignf(h, x) * sinf(y)));
+ h = expf(fabsf(x)) * 0.5F;
+ return (CMPLXF(h * cosf(y), copysignf(h, x) * sinf(y)));
} else if (ix < 0x4340b1e7) {
/* x < 192.7: scale to avoid overflow */
- z = __ldexp_cexpf(cpackf(fabsf(x), y), -1);
- return (cpackf(crealf(z), cimagf(z) * copysignf(1, x)));
+ z = __ldexp_cexpf(CMPLXF(fabsf(x), y), -1);
+ return (CMPLXF(crealf(z), cimagf(z) * copysignf(1, x)));
} else {
/* x >= 192.7: the result always overflows */
h = huge * x;
- return (cpackf(h * h * cosf(y), h * sinf(y)));
+ return (CMPLXF(h * h * cosf(y), h * sinf(y)));
}
}
- if (ix == 0 && iy >= 0x7f800000)
- return (cpackf(y - y, copysignf(0, x * (y - y))));
+ if (ix == 0) /* && iy >= 0x7f800000 */
+ return (CMPLXF(y - y, x * copysignf(0, y)));
- if (iy == 0 && ix >= 0x7f800000) {
- if ((hx & 0x7fffff) == 0)
- return (cpackf(x * x, copysignf(0, x) * y));
- return (cpackf(x * x, copysignf(0, (x + x) * y)));
- }
+ if (iy == 0) /* && ix >= 0x7f800000 */
+ return (CMPLXF(x * x, copysignf(0, x) * y));
- if (ix < 0x7f800000 && iy >= 0x7f800000)
- return (cpackf(y - y, x * (y - y)));
+ if (ix < 0x7f800000) /* && iy >= 0x7f800000 */
+ return (CMPLXF(y - y, x * (y - y)));
- if (ix >= 0x7f800000 && (hx & 0x7fffff) == 0) {
+ if (ix == 0x7f800000) {
if (iy >= 0x7f800000)
- return (cpackf(x * x, x * (y - y)));
- return (cpackf((x * x) * cosf(y), x * sinf(y)));
+ return (CMPLXF(INFINITY, x * (y - y)));
+ return (CMPLXF(INFINITY * cosf(y), x * sinf(y)));
}
- return (cpackf((x * x) * (y - y), (x + x) * (y - y)));
+ return (CMPLXF((x * x) * (y - y), (x + x) * (y - y)));
}
float complex
ccosf(float complex z)
{
- return (ccoshf(cpackf(-cimagf(z), crealf(z))));
+ return (ccoshf(CMPLXF(-cimagf(z), crealf(z))));
}
diff --git a/libm/upstream-freebsd/lib/msun/src/s_cexp.c b/libm/upstream-freebsd/lib/msun/src/s_cexp.c
index abe178f..660a68d 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_cexp.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_cexp.c
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/lib/msun/src/s_cexp.c 275819 2014-12-16 09:21:56Z ed $");
#include <complex.h>
#include <math.h>
@@ -50,22 +50,22 @@ cexp(double complex z)
/* cexp(x + I 0) = exp(x) + I 0 */
if ((hy | ly) == 0)
- return (cpack(exp(x), y));
+ return (CMPLX(exp(x), y));
EXTRACT_WORDS(hx, lx, x);
/* cexp(0 + I y) = cos(y) + I sin(y) */
if (((hx & 0x7fffffff) | lx) == 0)
- return (cpack(cos(y), sin(y)));
+ return (CMPLX(cos(y), sin(y)));
if (hy >= 0x7ff00000) {
if (lx != 0 || (hx & 0x7fffffff) != 0x7ff00000) {
/* cexp(finite|NaN +- I Inf|NaN) = NaN + I NaN */
- return (cpack(y - y, y - y));
+ return (CMPLX(y - y, y - y));
} else if (hx & 0x80000000) {
/* cexp(-Inf +- I Inf|NaN) = 0 + I 0 */
- return (cpack(0.0, 0.0));
+ return (CMPLX(0.0, 0.0));
} else {
/* cexp(+Inf +- I Inf|NaN) = Inf + I NaN */
- return (cpack(x, y - y));
+ return (CMPLX(x, y - y));
}
}
@@ -84,6 +84,6 @@ cexp(double complex z)
* - x = NaN (spurious inexact exception from y)
*/
exp_x = exp(x);
- return (cpack(exp_x * cos(y), exp_x * sin(y)));
+ return (CMPLX(exp_x * cos(y), exp_x * sin(y)));
}
}
diff --git a/libm/upstream-freebsd/lib/msun/src/s_cexpf.c b/libm/upstream-freebsd/lib/msun/src/s_cexpf.c
index 0e30d08..709ad47 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_cexpf.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_cexpf.c
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/lib/msun/src/s_cexpf.c 275819 2014-12-16 09:21:56Z ed $");
#include <complex.h>
#include <math.h>
@@ -50,22 +50,22 @@ cexpf(float complex z)
/* cexp(x + I 0) = exp(x) + I 0 */
if (hy == 0)
- return (cpackf(expf(x), y));
+ return (CMPLXF(expf(x), y));
GET_FLOAT_WORD(hx, x);
/* cexp(0 + I y) = cos(y) + I sin(y) */
if ((hx & 0x7fffffff) == 0)
- return (cpackf(cosf(y), sinf(y)));
+ return (CMPLXF(cosf(y), sinf(y)));
if (hy >= 0x7f800000) {
if ((hx & 0x7fffffff) != 0x7f800000) {
/* cexp(finite|NaN +- I Inf|NaN) = NaN + I NaN */
- return (cpackf(y - y, y - y));
+ return (CMPLXF(y - y, y - y));
} else if (hx & 0x80000000) {
/* cexp(-Inf +- I Inf|NaN) = 0 + I 0 */
- return (cpackf(0.0, 0.0));
+ return (CMPLXF(0.0, 0.0));
} else {
/* cexp(+Inf +- I Inf|NaN) = Inf + I NaN */
- return (cpackf(x, y - y));
+ return (CMPLXF(x, y - y));
}
}
@@ -84,6 +84,6 @@ cexpf(float complex z)
* - x = NaN (spurious inexact exception from y)
*/
exp_x = expf(x);
- return (cpackf(exp_x * cosf(y), exp_x * sinf(y)));
+ return (CMPLXF(exp_x * cosf(y), exp_x * sinf(y)));
}
}
diff --git a/libm/upstream-freebsd/lib/msun/src/s_conj.c b/libm/upstream-freebsd/lib/msun/src/s_conj.c
index 5770c29..61fac63 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_conj.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_conj.c
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: head/lib/msun/src/s_conj.c 275819 2014-12-16 09:21:56Z ed $
*/
#include <complex.h>
@@ -34,5 +34,5 @@ double complex
conj(double complex z)
{
- return (cpack(creal(z), -cimag(z)));
+ return (CMPLX(creal(z), -cimag(z)));
}
diff --git a/libm/upstream-freebsd/lib/msun/src/s_conjf.c b/libm/upstream-freebsd/lib/msun/src/s_conjf.c
index b090760..83c9ef0 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_conjf.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_conjf.c
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: head/lib/msun/src/s_conjf.c 275819 2014-12-16 09:21:56Z ed $
*/
#include <complex.h>
@@ -34,5 +34,5 @@ float complex
conjf(float complex z)
{
- return (cpackf(crealf(z), -cimagf(z)));
+ return (CMPLXF(crealf(z), -cimagf(z)));
}
diff --git a/libm/upstream-freebsd/lib/msun/src/s_conjl.c b/libm/upstream-freebsd/lib/msun/src/s_conjl.c
index 0e431ef..d9e6a16 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_conjl.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_conjl.c
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: head/lib/msun/src/s_conjl.c 275819 2014-12-16 09:21:56Z ed $
*/
#include <complex.h>
@@ -34,5 +34,5 @@ long double complex
conjl(long double complex z)
{
- return (cpackl(creall(z), -cimagl(z)));
+ return (CMPLXL(creall(z), -cimagl(z)));
}
diff --git a/libm/upstream-freebsd/lib/msun/src/s_cproj.c b/libm/upstream-freebsd/lib/msun/src/s_cproj.c
index 8e9404c..ec2266e 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_cproj.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_cproj.c
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/lib/msun/src/s_cproj.c 275819 2014-12-16 09:21:56Z ed $");
#include <complex.h>
#include <math.h>
@@ -39,7 +39,7 @@ cproj(double complex z)
if (!isinf(creal(z)) && !isinf(cimag(z)))
return (z);
else
- return (cpack(INFINITY, copysign(0.0, cimag(z))));
+ return (CMPLX(INFINITY, copysign(0.0, cimag(z))));
}
#if LDBL_MANT_DIG == 53
diff --git a/libm/upstream-freebsd/lib/msun/src/s_cprojf.c b/libm/upstream-freebsd/lib/msun/src/s_cprojf.c
index 68ea77b..63af75f 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_cprojf.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_cprojf.c
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/lib/msun/src/s_cprojf.c 275819 2014-12-16 09:21:56Z ed $");
#include <complex.h>
#include <math.h>
@@ -39,5 +39,5 @@ cprojf(float complex z)
if (!isinf(crealf(z)) && !isinf(cimagf(z)))
return (z);
else
- return (cpackf(INFINITY, copysignf(0.0, cimagf(z))));
+ return (CMPLXF(INFINITY, copysignf(0.0, cimagf(z))));
}
diff --git a/libm/upstream-freebsd/lib/msun/src/s_cprojl.c b/libm/upstream-freebsd/lib/msun/src/s_cprojl.c
index 07385bc..8386f81 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_cprojl.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_cprojl.c
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/lib/msun/src/s_cprojl.c 275819 2014-12-16 09:21:56Z ed $");
#include <complex.h>
#include <math.h>
@@ -39,5 +39,5 @@ cprojl(long double complex z)
if (!isinf(creall(z)) && !isinf(cimagl(z)))
return (z);
else
- return (cpackl(INFINITY, copysignl(0.0, cimagl(z))));
+ return (CMPLXL(INFINITY, copysignl(0.0, cimagl(z))));
}
diff --git a/libm/upstream-freebsd/lib/msun/src/s_csinh.c b/libm/upstream-freebsd/lib/msun/src/s_csinh.c
index c192f30..cff1402 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_csinh.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_csinh.c
@@ -32,10 +32,12 @@
*
* Exceptional values are noted in the comments within the source code.
* These values and the return value were taken from n1124.pdf.
+ * The sign of the result for some exceptional values is unspecified but
+ * must satisfy both sinh(conj(z)) == conj(sinh(z)) and sinh(-z) == -sinh(z).
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/lib/msun/src/s_csinh.c 284426 2015-06-15 20:16:53Z tijl $");
#include <complex.h>
#include <math.h>
@@ -62,48 +64,45 @@ csinh(double complex z)
/* Handle the nearly-non-exceptional cases where x and y are finite. */
if (ix < 0x7ff00000 && iy < 0x7ff00000) {
if ((iy | ly) == 0)
- return (cpack(sinh(x), y));
- if (ix < 0x40360000) /* small x: normal case */
- return (cpack(sinh(x) * cos(y), cosh(x) * sin(y)));
+ return (CMPLX(sinh(x), y));
+ if (ix < 0x40360000) /* |x| < 22: normal case */
+ return (CMPLX(sinh(x) * cos(y), cosh(x) * sin(y)));
/* |x| >= 22, so cosh(x) ~= exp(|x|) */
if (ix < 0x40862e42) {
/* x < 710: exp(|x|) won't overflow */
h = exp(fabs(x)) * 0.5;
- return (cpack(copysign(h, x) * cos(y), h * sin(y)));
+ return (CMPLX(copysign(h, x) * cos(y), h * sin(y)));
} else if (ix < 0x4096bbaa) {
/* x < 1455: scale to avoid overflow */
- z = __ldexp_cexp(cpack(fabs(x), y), -1);
- return (cpack(creal(z) * copysign(1, x), cimag(z)));
+ z = __ldexp_cexp(CMPLX(fabs(x), y), -1);
+ return (CMPLX(creal(z) * copysign(1, x), cimag(z)));
} else {
/* x >= 1455: the result always overflows */
h = huge * x;
- return (cpack(h * cos(y), h * h * sin(y)));
+ return (CMPLX(h * cos(y), h * h * sin(y)));
}
}
/*
- * sinh(+-0 +- I Inf) = sign(d(+-0, dNaN))0 + I dNaN.
- * The sign of 0 in the result is unspecified. Choice = normally
- * the same as dNaN. Raise the invalid floating-point exception.
+ * sinh(+-0 +- I Inf) = +-0 + I dNaN.
+ * The sign of 0 in the result is unspecified. Choice = same sign
+ * as the argument. Raise the invalid floating-point exception.
*
- * sinh(+-0 +- I NaN) = sign(d(+-0, NaN))0 + I d(NaN).
- * The sign of 0 in the result is unspecified. Choice = normally
- * the same as d(NaN).
+ * sinh(+-0 +- I NaN) = +-0 + I d(NaN).
+ * The sign of 0 in the result is unspecified. Choice = same sign
+ * as the argument.
*/
- if ((ix | lx) == 0 && iy >= 0x7ff00000)
- return (cpack(copysign(0, x * (y - y)), y - y));
+ if ((ix | lx) == 0) /* && iy >= 0x7ff00000 */
+ return (CMPLX(x, y - y));
/*
* sinh(+-Inf +- I 0) = +-Inf + I +-0.
*
* sinh(NaN +- I 0) = d(NaN) + I +-0.
*/
- if ((iy | ly) == 0 && ix >= 0x7ff00000) {
- if (((hx & 0xfffff) | lx) == 0)
- return (cpack(x, y));
- return (cpack(x, copysign(0, y)));
- }
+ if ((iy | ly) == 0) /* && ix >= 0x7ff00000 */
+ return (CMPLX(x + x, y));
/*
* sinh(x +- I Inf) = dNaN + I dNaN.
@@ -113,45 +112,45 @@ csinh(double complex z)
* Optionally raises the invalid floating-point exception for finite
* nonzero x. Choice = don't raise (except for signaling NaNs).
*/
- if (ix < 0x7ff00000 && iy >= 0x7ff00000)
- return (cpack(y - y, x * (y - y)));
+ if (ix < 0x7ff00000) /* && iy >= 0x7ff00000 */
+ return (CMPLX(y - y, y - y));
/*
* sinh(+-Inf + I NaN) = +-Inf + I d(NaN).
- * The sign of Inf in the result is unspecified. Choice = normally
- * the same as d(NaN).
+ * The sign of Inf in the result is unspecified. Choice = same sign
+ * as the argument.
*
- * sinh(+-Inf +- I Inf) = +Inf + I dNaN.
- * The sign of Inf in the result is unspecified. Choice = always +.
- * Raise the invalid floating-point exception.
+ * sinh(+-Inf +- I Inf) = +-Inf + I dNaN.
+ * The sign of Inf in the result is unspecified. Choice = same sign
+ * as the argument. Raise the invalid floating-point exception.
*
* sinh(+-Inf + I y) = +-Inf cos(y) + I Inf sin(y)
*/
- if (ix >= 0x7ff00000 && ((hx & 0xfffff) | lx) == 0) {
+ if (ix == 0x7ff00000 && lx == 0) {
if (iy >= 0x7ff00000)
- return (cpack(x * x, x * (y - y)));
- return (cpack(x * cos(y), INFINITY * sin(y)));
+ return (CMPLX(x, y - y));
+ return (CMPLX(x * cos(y), INFINITY * sin(y)));
}
/*
- * sinh(NaN + I NaN) = d(NaN) + I d(NaN).
+ * sinh(NaN1 + I NaN2) = d(NaN1, NaN2) + I d(NaN1, NaN2).
*
- * sinh(NaN +- I Inf) = d(NaN) + I d(NaN).
+ * sinh(NaN +- I Inf) = d(NaN, dNaN) + I d(NaN, dNaN).
* Optionally raises the invalid floating-point exception.
* Choice = raise.
*
- * sinh(NaN + I y) = d(NaN) + I d(NaN).
+ * sinh(NaN + I y) = d(NaN) + I d(NaN).
* Optionally raises the invalid floating-point exception for finite
* nonzero y. Choice = don't raise (except for signaling NaNs).
*/
- return (cpack((x * x) * (y - y), (x + x) * (y - y)));
+ return (CMPLX((x + x) * (y - y), (x * x) * (y - y)));
}
double complex
csin(double complex z)
{
- /* csin(z) = -I * csinh(I * z) */
- z = csinh(cpack(-cimag(z), creal(z)));
- return (cpack(cimag(z), -creal(z)));
+ /* csin(z) = -I * csinh(I * z) = I * conj(csinh(I * conj(z))). */
+ z = csinh(CMPLX(cimag(z), creal(z)));
+ return (CMPLX(cimag(z), creal(z)));
}
diff --git a/libm/upstream-freebsd/lib/msun/src/s_csinhf.c b/libm/upstream-freebsd/lib/msun/src/s_csinhf.c
index c523125..f050890 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_csinhf.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_csinhf.c
@@ -25,11 +25,11 @@
*/
/*
- * Hyperbolic sine of a complex argument z. See s_csinh.c for details.
+ * Float version of csinh(). See s_csinh.c for details.
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/lib/msun/src/s_csinhf.c 284426 2015-06-15 20:16:53Z tijl $");
#include <complex.h>
#include <math.h>
@@ -55,51 +55,48 @@ csinhf(float complex z)
if (ix < 0x7f800000 && iy < 0x7f800000) {
if (iy == 0)
- return (cpackf(sinhf(x), y));
- if (ix < 0x41100000) /* small x: normal case */
- return (cpackf(sinhf(x) * cosf(y), coshf(x) * sinf(y)));
+ return (CMPLXF(sinhf(x), y));
+ if (ix < 0x41100000) /* |x| < 9: normal case */
+ return (CMPLXF(sinhf(x) * cosf(y), coshf(x) * sinf(y)));
/* |x| >= 9, so cosh(x) ~= exp(|x|) */
if (ix < 0x42b17218) {
/* x < 88.7: expf(|x|) won't overflow */
- h = expf(fabsf(x)) * 0.5f;
- return (cpackf(copysignf(h, x) * cosf(y), h * sinf(y)));
+ h = expf(fabsf(x)) * 0.5F;
+ return (CMPLXF(copysignf(h, x) * cosf(y), h * sinf(y)));
} else if (ix < 0x4340b1e7) {
/* x < 192.7: scale to avoid overflow */
- z = __ldexp_cexpf(cpackf(fabsf(x), y), -1);
- return (cpackf(crealf(z) * copysignf(1, x), cimagf(z)));
+ z = __ldexp_cexpf(CMPLXF(fabsf(x), y), -1);
+ return (CMPLXF(crealf(z) * copysignf(1, x), cimagf(z)));
} else {
/* x >= 192.7: the result always overflows */
h = huge * x;
- return (cpackf(h * cosf(y), h * h * sinf(y)));
+ return (CMPLXF(h * cosf(y), h * h * sinf(y)));
}
}
- if (ix == 0 && iy >= 0x7f800000)
- return (cpackf(copysignf(0, x * (y - y)), y - y));
+ if (ix == 0) /* && iy >= 0x7f800000 */
+ return (CMPLXF(x, y - y));
- if (iy == 0 && ix >= 0x7f800000) {
- if ((hx & 0x7fffff) == 0)
- return (cpackf(x, y));
- return (cpackf(x, copysignf(0, y)));
- }
+ if (iy == 0) /* && ix >= 0x7f800000 */
+ return (CMPLXF(x + x, y));
- if (ix < 0x7f800000 && iy >= 0x7f800000)
- return (cpackf(y - y, x * (y - y)));
+ if (ix < 0x7f800000) /* && iy >= 0x7f800000 */
+ return (CMPLXF(y - y, y - y));
- if (ix >= 0x7f800000 && (hx & 0x7fffff) == 0) {
+ if (ix == 0x7f800000) {
if (iy >= 0x7f800000)
- return (cpackf(x * x, x * (y - y)));
- return (cpackf(x * cosf(y), INFINITY * sinf(y)));
+ return (CMPLXF(x, y - y));
+ return (CMPLXF(x * cosf(y), INFINITY * sinf(y)));
}
- return (cpackf((x * x) * (y - y), (x + x) * (y - y)));
+ return (CMPLXF((x + x) * (y - y), (x * x) * (y - y)));
}
float complex
csinf(float complex z)
{
- z = csinhf(cpackf(-cimagf(z), crealf(z)));
- return (cpackf(cimagf(z), -crealf(z)));
+ z = csinhf(CMPLXF(cimagf(z), crealf(z)));
+ return (CMPLXF(cimagf(z), crealf(z)));
}
diff --git a/libm/upstream-freebsd/lib/msun/src/s_csqrt.c b/libm/upstream-freebsd/lib/msun/src/s_csqrt.c
index 18a7ae3..c908a2d 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_csqrt.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_csqrt.c
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/lib/msun/src/s_csqrt.c 275819 2014-12-16 09:21:56Z ed $");
#include <complex.h>
#include <float.h>
@@ -58,12 +58,12 @@ csqrt(double complex z)
/* Handle special cases. */
if (z == 0)
- return (cpack(0, b));
+ return (CMPLX(0, b));
if (isinf(b))
- return (cpack(INFINITY, b));
+ return (CMPLX(INFINITY, b));
if (isnan(a)) {
t = (b - b) / (b - b); /* raise invalid if b is not a NaN */
- return (cpack(a, t)); /* return NaN + NaN i */
+ return (CMPLX(a, t)); /* return NaN + NaN i */
}
if (isinf(a)) {
/*
@@ -73,9 +73,9 @@ csqrt(double complex z)
* csqrt(-inf + y i) = 0 + inf i
*/
if (signbit(a))
- return (cpack(fabs(b - b), copysign(a, b)));
+ return (CMPLX(fabs(b - b), copysign(a, b)));
else
- return (cpack(a, copysign(b - b, b)));
+ return (CMPLX(a, copysign(b - b, b)));
}
/*
* The remaining special case (b is NaN) is handled just fine by
@@ -94,10 +94,10 @@ csqrt(double complex z)
/* Algorithm 312, CACM vol 10, Oct 1967. */
if (a >= 0) {
t = sqrt((a + hypot(a, b)) * 0.5);
- result = cpack(t, b / (2 * t));
+ result = CMPLX(t, b / (2 * t));
} else {
t = sqrt((-a + hypot(a, b)) * 0.5);
- result = cpack(fabs(b) / (2 * t), copysign(t, b));
+ result = CMPLX(fabs(b) / (2 * t), copysign(t, b));
}
/* Rescale. */
diff --git a/libm/upstream-freebsd/lib/msun/src/s_csqrtf.c b/libm/upstream-freebsd/lib/msun/src/s_csqrtf.c
index da7fe18..12a894f 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_csqrtf.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_csqrtf.c
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/lib/msun/src/s_csqrtf.c 275819 2014-12-16 09:21:56Z ed $");
#include <complex.h>
#include <math.h>
@@ -49,12 +49,12 @@ csqrtf(float complex z)
/* Handle special cases. */
if (z == 0)
- return (cpackf(0, b));
+ return (CMPLXF(0, b));
if (isinf(b))
- return (cpackf(INFINITY, b));
+ return (CMPLXF(INFINITY, b));
if (isnan(a)) {
t = (b - b) / (b - b); /* raise invalid if b is not a NaN */
- return (cpackf(a, t)); /* return NaN + NaN i */
+ return (CMPLXF(a, t)); /* return NaN + NaN i */
}
if (isinf(a)) {
/*
@@ -64,9 +64,9 @@ csqrtf(float complex z)
* csqrtf(-inf + y i) = 0 + inf i
*/
if (signbit(a))
- return (cpackf(fabsf(b - b), copysignf(a, b)));
+ return (CMPLXF(fabsf(b - b), copysignf(a, b)));
else
- return (cpackf(a, copysignf(b - b, b)));
+ return (CMPLXF(a, copysignf(b - b, b)));
}
/*
* The remaining special case (b is NaN) is handled just fine by
@@ -80,9 +80,9 @@ csqrtf(float complex z)
*/
if (a >= 0) {
t = sqrt((a + hypot(a, b)) * 0.5);
- return (cpackf(t, b / (2.0 * t)));
+ return (CMPLXF(t, b / (2.0 * t)));
} else {
t = sqrt((-a + hypot(a, b)) * 0.5);
- return (cpackf(fabsf(b) / (2.0 * t), copysignf(t, b)));
+ return (CMPLXF(fabsf(b) / (2.0 * t), copysignf(t, b)));
}
}
diff --git a/libm/upstream-freebsd/lib/msun/src/s_csqrtl.c b/libm/upstream-freebsd/lib/msun/src/s_csqrtl.c
index dd18e1e..7bcff59 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_csqrtl.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_csqrtl.c
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/lib/msun/src/s_csqrtl.c 275819 2014-12-16 09:21:56Z ed $");
#include <complex.h>
#include <float.h>
@@ -58,12 +58,12 @@ csqrtl(long double complex z)
/* Handle special cases. */
if (z == 0)
- return (cpackl(0, b));
+ return (CMPLXL(0, b));
if (isinf(b))
- return (cpackl(INFINITY, b));
+ return (CMPLXL(INFINITY, b));
if (isnan(a)) {
t = (b - b) / (b - b); /* raise invalid if b is not a NaN */
- return (cpackl(a, t)); /* return NaN + NaN i */
+ return (CMPLXL(a, t)); /* return NaN + NaN i */
}
if (isinf(a)) {
/*
@@ -73,9 +73,9 @@ csqrtl(long double complex z)
* csqrt(-inf + y i) = 0 + inf i
*/
if (signbit(a))
- return (cpackl(fabsl(b - b), copysignl(a, b)));
+ return (CMPLXL(fabsl(b - b), copysignl(a, b)));
else
- return (cpackl(a, copysignl(b - b, b)));
+ return (CMPLXL(a, copysignl(b - b, b)));
}
/*
* The remaining special case (b is NaN) is handled just fine by
@@ -94,10 +94,10 @@ csqrtl(long double complex z)
/* Algorithm 312, CACM vol 10, Oct 1967. */
if (a >= 0) {
t = sqrtl((a + hypotl(a, b)) * 0.5);
- result = cpackl(t, b / (2 * t));
+ result = CMPLXL(t, b / (2 * t));
} else {
t = sqrtl((-a + hypotl(a, b)) * 0.5);
- result = cpackl(fabsl(b) / (2 * t), copysignl(t, b));
+ result = CMPLXL(fabsl(b) / (2 * t), copysignl(t, b));
}
/* Rescale. */
diff --git a/libm/upstream-freebsd/lib/msun/src/s_ctanh.c b/libm/upstream-freebsd/lib/msun/src/s_ctanh.c
index d427e28..e5973c3 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_ctanh.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_ctanh.c
@@ -25,7 +25,7 @@
*/
/*
- * Hyperbolic tangent of a complex argument z = x + i y.
+ * Hyperbolic tangent of a complex argument z = x + I y.
*
* The algorithm is from:
*
@@ -44,15 +44,15 @@
*
* tanh(z) = sinh(z) / cosh(z)
*
- * sinh(x) cos(y) + i cosh(x) sin(y)
+ * sinh(x) cos(y) + I cosh(x) sin(y)
* = ---------------------------------
- * cosh(x) cos(y) + i sinh(x) sin(y)
+ * cosh(x) cos(y) + I sinh(x) sin(y)
*
- * cosh(x) sinh(x) / cos^2(y) + i tan(y)
+ * cosh(x) sinh(x) / cos^2(y) + I tan(y)
* = -------------------------------------
* 1 + sinh^2(x) / cos^2(y)
*
- * beta rho s + i t
+ * beta rho s + I t
* = ----------------
* 1 + beta s^2
*
@@ -64,7 +64,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/lib/msun/src/s_ctanh.c 284427 2015-06-15 20:40:44Z tijl $");
#include <complex.h>
#include <math.h>
@@ -85,16 +85,16 @@ ctanh(double complex z)
ix = hx & 0x7fffffff;
/*
- * ctanh(NaN + i 0) = NaN + i 0
+ * ctanh(NaN +- I 0) = d(NaN) +- I 0
*
- * ctanh(NaN + i y) = NaN + i NaN for y != 0
+ * ctanh(NaN + I y) = d(NaN,y) + I d(NaN,y) for y != 0
*
* The imaginary part has the sign of x*sin(2*y), but there's no
* special effort to get this right.
*
- * ctanh(+-Inf +- i Inf) = +-1 +- 0
+ * ctanh(+-Inf +- I Inf) = +-1 +- I 0
*
- * ctanh(+-Inf + i y) = +-1 + 0 sin(2y) for y finite
+ * ctanh(+-Inf + I y) = +-1 + I 0 sin(2y) for y finite
*
* The imaginary part of the sign is unspecified. This special
* case is only needed to avoid a spurious invalid exception when
@@ -102,26 +102,27 @@ ctanh(double complex z)
*/
if (ix >= 0x7ff00000) {
if ((ix & 0xfffff) | lx) /* x is NaN */
- return (cpack(x, (y == 0 ? y : x * y)));
+ return (CMPLX((x + 0) * (y + 0),
+ y == 0 ? y : (x + 0) * (y + 0)));
SET_HIGH_WORD(x, hx - 0x40000000); /* x = copysign(1, x) */
- return (cpack(x, copysign(0, isinf(y) ? y : sin(y) * cos(y))));
+ return (CMPLX(x, copysign(0, isinf(y) ? y : sin(y) * cos(y))));
}
/*
- * ctanh(x + i NAN) = NaN + i NaN
- * ctanh(x +- i Inf) = NaN + i NaN
+ * ctanh(x + I NaN) = d(NaN) + I d(NaN)
+ * ctanh(x +- I Inf) = dNaN + I dNaN
*/
if (!isfinite(y))
- return (cpack(y - y, y - y));
+ return (CMPLX(y - y, y - y));
/*
- * ctanh(+-huge + i +-y) ~= +-1 +- i 2sin(2y)/exp(2x), using the
+ * ctanh(+-huge +- I y) ~= +-1 +- I 2sin(2y)/exp(2x), using the
* approximation sinh^2(huge) ~= exp(2*huge) / 4.
* We use a modified formula to avoid spurious overflow.
*/
- if (ix >= 0x40360000) { /* x >= 22 */
+ if (ix >= 0x40360000) { /* |x| >= 22 */
double exp_mx = exp(-fabs(x));
- return (cpack(copysign(1, x),
+ return (CMPLX(copysign(1, x),
4 * sin(y) * cos(y) * exp_mx * exp_mx));
}
@@ -131,14 +132,14 @@ ctanh(double complex z)
s = sinh(x);
rho = sqrt(1 + s * s); /* = cosh(x) */
denom = 1 + beta * s * s;
- return (cpack((beta * rho * s) / denom, t / denom));
+ return (CMPLX((beta * rho * s) / denom, t / denom));
}
double complex
ctan(double complex z)
{
- /* ctan(z) = -I * ctanh(I * z) */
- z = ctanh(cpack(-cimag(z), creal(z)));
- return (cpack(cimag(z), -creal(z)));
+ /* ctan(z) = -I * ctanh(I * z) = I * conj(ctanh(I * conj(z))) */
+ z = ctanh(CMPLX(cimag(z), creal(z)));
+ return (CMPLX(cimag(z), creal(z)));
}
diff --git a/libm/upstream-freebsd/lib/msun/src/s_ctanhf.c b/libm/upstream-freebsd/lib/msun/src/s_ctanhf.c
index 4be28d8..e9826c0 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_ctanhf.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_ctanhf.c
@@ -29,7 +29,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/lib/msun/src/s_ctanhf.c 284428 2015-06-15 20:47:26Z tijl $");
#include <complex.h>
#include <math.h>
@@ -51,18 +51,19 @@ ctanhf(float complex z)
if (ix >= 0x7f800000) {
if (ix & 0x7fffff)
- return (cpackf(x, (y == 0 ? y : x * y)));
+ return (CMPLXF((x + 0) * (y + 0),
+ y == 0 ? y : (x + 0) * (y + 0)));
SET_FLOAT_WORD(x, hx - 0x40000000);
- return (cpackf(x,
+ return (CMPLXF(x,
copysignf(0, isinf(y) ? y : sinf(y) * cosf(y))));
}
if (!isfinite(y))
- return (cpackf(y - y, y - y));
+ return (CMPLXF(y - y, y - y));
- if (ix >= 0x41300000) { /* x >= 11 */
+ if (ix >= 0x41300000) { /* |x| >= 11 */
float exp_mx = expf(-fabsf(x));
- return (cpackf(copysignf(1, x),
+ return (CMPLXF(copysignf(1, x),
4 * sinf(y) * cosf(y) * exp_mx * exp_mx));
}
@@ -71,14 +72,14 @@ ctanhf(float complex z)
s = sinhf(x);
rho = sqrtf(1 + s * s);
denom = 1 + beta * s * s;
- return (cpackf((beta * rho * s) / denom, t / denom));
+ return (CMPLXF((beta * rho * s) / denom, t / denom));
}
float complex
ctanf(float complex z)
{
- z = ctanhf(cpackf(-cimagf(z), crealf(z)));
- return (cpackf(cimagf(z), -crealf(z)));
+ z = ctanhf(CMPLXF(cimagf(z), crealf(z)));
+ return (CMPLXF(cimagf(z), crealf(z)));
}
diff --git a/libm/upstream-freebsd/lib/msun/src/s_exp2.c b/libm/upstream-freebsd/lib/msun/src/s_exp2.c
index fde11c2..dbef729 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_exp2.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_exp2.c
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/lib/msun/src/s_exp2.c 286515 2015-08-09 10:00:13Z dim $");
#include <float.h>
@@ -376,14 +376,14 @@ exp2(double x)
/* Compute r = exp2(y) = exp2t[i0] * p(z - eps[i]). */
t = tbl[i0]; /* exp2t[i0] */
z -= tbl[i0 + 1]; /* eps[i0] */
- if (k >= -1021 << 20)
+ if (k >= -(1021 << 20))
INSERT_WORDS(twopk, 0x3ff00000 + k, 0);
else
INSERT_WORDS(twopkp1000, 0x3ff00000 + k + (1000 << 20), 0);
r = t + t * z * (P1 + z * (P2 + z * (P3 + z * (P4 + z * P5))));
/* Scale by 2**(k>>20). */
- if(k >= -1021 << 20) {
+ if(k >= -(1021 << 20)) {
if (k == 1024 << 20)
return (r * 2.0 * 0x1p1023);
return (r * twopk);
diff --git a/libm/upstream-freebsd/lib/msun/src/s_fabs.c b/libm/upstream-freebsd/lib/msun/src/s_fabs.c
deleted file mode 100644
index 15529e5..0000000
--- a/libm/upstream-freebsd/lib/msun/src/s_fabs.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/* @(#)s_fabs.c 5.1 93/09/24 */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
-#ifndef lint
-static char rcsid[] = "$FreeBSD$";
-#endif
-
-/*
- * fabs(x) returns the absolute value of x.
- */
-
-#include "math.h"
-#include "math_private.h"
-
-double
-fabs(double x)
-{
- u_int32_t high;
- GET_HIGH_WORD(high,x);
- SET_HIGH_WORD(x,high&0x7fffffff);
- return x;
-}
diff --git a/libm/upstream-freebsd/lib/msun/src/s_fabsf.c b/libm/upstream-freebsd/lib/msun/src/s_fabsf.c
deleted file mode 100644
index e9383d0..0000000
--- a/libm/upstream-freebsd/lib/msun/src/s_fabsf.c
+++ /dev/null
@@ -1,33 +0,0 @@
-/* s_fabsf.c -- float version of s_fabs.c.
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- */
-
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-/*
- * fabsf(x) returns the absolute value of x.
- */
-
-#include "math.h"
-#include "math_private.h"
-
-float
-fabsf(float x)
-{
- u_int32_t ix;
- GET_FLOAT_WORD(ix,x);
- SET_FLOAT_WORD(x,ix&0x7fffffff);
- return x;
-}
diff --git a/libm/upstream-freebsd/lib/msun/src/s_scalbln.c b/libm/upstream-freebsd/lib/msun/src/s_scalbln.c
index d609d4e..8e61377 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_scalbln.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_scalbln.c
@@ -25,52 +25,30 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/lib/msun/src/s_scalbln.c 278339 2015-02-07 00:38:18Z kargl $");
-#include <limits.h>
#include <math.h>
+#define NMAX 65536
+#define NMIN -65536
+
double
-scalbln (double x, long n)
+scalbln(double x, long n)
{
- int in;
- in = (int)n;
- if (in != n) {
- if (n > 0)
- in = INT_MAX;
- else
- in = INT_MIN;
- }
- return (scalbn(x, in));
+ return (scalbn(x, (n > NMAX) ? NMAX : (n < NMIN) ? NMIN : (int)n));
}
float
-scalblnf (float x, long n)
+scalblnf(float x, long n)
{
- int in;
- in = (int)n;
- if (in != n) {
- if (n > 0)
- in = INT_MAX;
- else
- in = INT_MIN;
- }
- return (scalbnf(x, in));
+ return (scalbnf(x, (n > NMAX) ? NMAX : (n < NMIN) ? NMIN : (int)n));
}
long double
-scalblnl (long double x, long n)
+scalblnl(long double x, long n)
{
- int in;
- in = (int)n;
- if (in != n) {
- if (n > 0)
- in = INT_MAX;
- else
- in = INT_MIN;
- }
- return (scalbnl(x, (int)n));
+ return (scalbnl(x, (n > NMAX) ? NMAX : (n < NMIN) ? NMIN : (int)n));
}
diff --git a/linker/linker.cpp b/linker/linker.cpp
index d3ac1d0..bc40cf1 100644
--- a/linker/linker.cpp
+++ b/linker/linker.cpp
@@ -890,6 +890,67 @@ typedef linked_list_t<soinfo> SoinfoLinkedList;
typedef linked_list_t<const char> StringLinkedList;
typedef linked_list_t<LoadTask> LoadTaskList;
+static soinfo* find_library(const char* name, int rtld_flags, const android_dlextinfo* extinfo);
+
+// g_ld_all_shim_libs maintains the references to memory as it used
+// in the soinfo structures and in the g_active_shim_libs list.
+
+static std::vector<std::string> g_ld_all_shim_libs;
+
+// g_active_shim_libs are all shim libs that are still eligible
+// to be loaded. We must remove a shim lib from the list before
+// we load the library to avoid recursive loops (load shim libA
+// for libB where libA also links against libB).
+
+static linked_list_t<const std::string> g_active_shim_libs;
+
+static void reset_g_active_shim_libs(void) {
+ g_active_shim_libs.clear();
+ for (const auto& pair : g_ld_all_shim_libs) {
+ g_active_shim_libs.push_back(&pair);
+ }
+}
+
+static void parse_LD_SHIM_LIBS(const char* path) {
+ parse_path(path, " :", &g_ld_all_shim_libs);
+ reset_g_active_shim_libs();
+}
+
+static bool shim_lib_matches(const char *shim_lib, const char *realpath) {
+ const char *sep = strchr(shim_lib, '|');
+ return sep != nullptr && strncmp(realpath, shim_lib, sep - shim_lib) == 0;
+}
+
+template<typename F>
+static void shim_libs_for_each(const char *const path, F action) {
+ if (path == nullptr) return;
+ INFO("Finding shim libs for \"%s\"\n", path);
+ std::vector<const std::string *> matched;
+
+ g_active_shim_libs.for_each([&](const std::string *a_pair) {
+ const char *pair = a_pair->c_str();
+ if (shim_lib_matches(pair, path)) {
+ matched.push_back(a_pair);
+ }
+ });
+
+ g_active_shim_libs.remove_if([&](const std::string *a_pair) {
+ const char *pair = a_pair->c_str();
+ return shim_lib_matches(pair, path);
+ });
+
+ for (const auto& one_pair : matched) {
+ const char* const pair = one_pair->c_str();
+ const char* sep = strchr(pair, '|');
+ soinfo *child = find_library(sep+1, RTLD_GLOBAL, nullptr);
+ if (child) {
+ INFO("Using shim lib \"%s\"\n", sep+1);
+ action(child);
+ } else {
+ PRINT("Shim lib \"%s\" can not be loaded, ignoring.", sep+1);
+ }
+ }
+}
// This function walks down the tree of soinfo dependencies
// in breadth-first order and
@@ -899,7 +960,7 @@ typedef linked_list_t<LoadTask> LoadTaskList;
// walk_dependencies_tree returns false if walk was terminated
// by the action and true otherwise.
template<typename F>
-static bool walk_dependencies_tree(soinfo* root_soinfos[], size_t root_soinfos_size, F action) {
+static bool walk_dependencies_tree(soinfo* root_soinfos[], size_t root_soinfos_size, bool do_shims, F action) {
SoinfoLinkedList visit_list;
SoinfoLinkedList visited;
@@ -919,6 +980,13 @@ static bool walk_dependencies_tree(soinfo* root_soinfos[], size_t root_soinfos_s
visited.push_back(si);
+ if (do_shims) {
+ shim_libs_for_each(si->get_realpath(), [&](soinfo* child) {
+ si->add_child(child);
+ visit_list.push_back(child);
+ });
+ }
+
si->get_children().for_each([&](soinfo* child) {
visit_list.push_back(child);
});
@@ -933,7 +1001,7 @@ static const ElfW(Sym)* dlsym_handle_lookup(soinfo* root, soinfo* skip_until,
const ElfW(Sym)* result = nullptr;
bool skip_lookup = skip_until != nullptr;
- walk_dependencies_tree(&root, 1, [&](soinfo* current_soinfo) {
+ walk_dependencies_tree(&root, 1, false, [&](soinfo* current_soinfo) {
if (skip_lookup) {
skip_lookup = current_soinfo != skip_until;
return true;
@@ -1513,6 +1581,7 @@ static bool find_libraries(soinfo* start_with, const char* const library_names[]
walk_dependencies_tree(
start_with == nullptr ? soinfos : &start_with,
start_with == nullptr ? soinfos_count : 1,
+ true,
[&] (soinfo* si) {
local_group.push_back(si);
return true;
@@ -1692,6 +1761,7 @@ soinfo* do_dlopen(const char* name, int flags, const android_dlextinfo* extinfo)
}
ProtectedDataGuard guard;
+ reset_g_active_shim_libs();
soinfo* si = find_library(name, flags, extinfo);
if (si != nullptr) {
si->call_constructors();
@@ -2943,12 +3013,14 @@ bool soinfo::link_image(const soinfo_list_t& global_group, const soinfo_list_t&
if (has_text_relocations) {
// Fail if app is targeting sdk version > 22
// TODO (dimitry): remove != __ANDROID_API__ check once http://b/20020312 is fixed
+#if !defined(__i386__) // ffmpeg says that they require text relocations on x86
if (get_application_target_sdk_version() != __ANDROID_API__
&& get_application_target_sdk_version() > 22) {
PRINT("%s: has text relocations", get_realpath());
DL_ERR("%s: has text relocations", get_realpath());
return false;
}
+#endif
// Make segments writable to allow text relocations to work properly. We will later call
// phdr_table_protect_segments() after all of them are applied and all constructors are run.
DL_WARN("%s has text relocations. This is wasting memory and prevents "
@@ -3164,9 +3236,11 @@ static ElfW(Addr) __linker_init_post_relocation(KernelArgumentBlock& args, ElfW(
// doesn't cost us anything.
const char* ldpath_env = nullptr;
const char* ldpreload_env = nullptr;
+ const char* ldshim_libs_env = nullptr;
if (!getauxval(AT_SECURE)) {
ldpath_env = getenv("LD_LIBRARY_PATH");
ldpreload_env = getenv("LD_PRELOAD");
+ ldshim_libs_env = getenv("LD_SHIM_LIBS");
}
INFO("[ android linker & debugger ]");
@@ -3220,6 +3294,7 @@ static ElfW(Addr) __linker_init_post_relocation(KernelArgumentBlock& args, ElfW(
// Use LD_LIBRARY_PATH and LD_PRELOAD (but only if we aren't setuid/setgid).
parse_LD_LIBRARY_PATH(ldpath_env);
parse_LD_PRELOAD(ldpreload_env);
+ parse_LD_SHIM_LIBS(ldshim_libs_env);
somain = si;
diff --git a/tests/Android.mk b/tests/Android.mk
index dc2e410..b994cc3 100644
--- a/tests/Android.mk
+++ b/tests/Android.mk
@@ -64,6 +64,7 @@ libBionicStandardTests_src_files := \
getcwd_test.cpp \
inttypes_test.cpp \
libc_logging_test.cpp \
+ libgen_basename_test.cpp \
libgen_test.cpp \
locale_test.cpp \
malloc_test.cpp \
diff --git a/tests/buffer_tests.cpp b/tests/buffer_tests.cpp
index 4967382..a2b330e 100644
--- a/tests/buffer_tests.cpp
+++ b/tests/buffer_tests.cpp
@@ -381,15 +381,19 @@ void RunSrcDstBufferOverreadTest(void (*test_func)(uint8_t*, uint8_t*, size_t))
// Make the second page unreadable and unwritable.
ASSERT_TRUE(mprotect(&memory[pagesize], pagesize, PROT_NONE) == 0);
- uint8_t* dst = new uint8_t[pagesize];
- for (size_t i = 0; i < pagesize; i++) {
- uint8_t* src = &memory[pagesize-i];
-
- test_func(src, dst, i);
+ uint8_t* dst_buffer = new uint8_t[2*pagesize];
+ // Change the dst alignment as we change the source.
+ for (size_t i = 0; i < 16; i++) {
+ uint8_t* dst = &dst_buffer[i];
+ for (size_t j = 0; j < pagesize; j++) {
+ uint8_t* src = &memory[pagesize-j];
+
+ test_func(src, dst, j);
+ }
}
ASSERT_TRUE(mprotect(&memory[pagesize], pagesize, PROT_READ | PROT_WRITE) == 0);
free(memory);
- delete dst;
+ delete dst_buffer;
}
void RunCmpBufferOverreadTest(
diff --git a/tests/libgen_basename_test.cpp b/tests/libgen_basename_test.cpp
new file mode 100644
index 0000000..55939d1
--- /dev/null
+++ b/tests/libgen_basename_test.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _GNU_SOURCE
+ #define _GNU_SOURCE 1
+#endif
+
+#include <string.h>
+
+#if defined(basename)
+ #error basename should not be defined at this point
+#endif
+
+static const char* gnu_basename(const char* in) {
+ return basename(in);
+}
+
+#include <libgen.h>
+
+#if !defined(basename)
+ #error basename should be defined at this point
+#endif
+
+static char* posix_basename(char* in) {
+ return basename(in);
+}
+
+#include <errno.h>
+#include <gtest/gtest.h>
+
+static void __TestGnuBasename(const char* in, const char* expected_out, int line) {
+ const char* out = gnu_basename(in);
+ ASSERT_STREQ(expected_out, out) << "(" << line << "): " << in << std::endl;
+ ASSERT_EQ(0, errno) << "(" << line << "): " << in << std::endl;
+}
+
+static void __TestPosixBasename(const char* in, const char* expected_out, int line) {
+ char* writable_in = (in != NULL) ? strdup(in) : NULL;
+ errno = 0;
+ const char* out = posix_basename(&writable_in[0]);
+ ASSERT_STREQ(expected_out, out) << "(" << line << "): " << in << std::endl;
+ ASSERT_EQ(0, errno) << "(" << line << "): " << in << std::endl;
+ free(writable_in);
+}
+
+#define TestGnuBasename(in, expected) __TestGnuBasename(in, expected, __LINE__)
+#define TestPosixBasename(in, expected) __TestPosixBasename(in, expected, __LINE__)
+
+TEST(libgen_basename, gnu_basename) {
+ // GNU's basename doesn't accept NULL
+ // TestGnuBasename(NULL, ".");
+ TestGnuBasename("", "");
+ TestGnuBasename("/usr/lib", "lib");
+ TestGnuBasename("/system/bin/sh/", "");
+ TestGnuBasename("/usr/", "");
+ TestGnuBasename("usr", "usr");
+ TestGnuBasename("/", "");
+ TestGnuBasename(".", ".");
+ TestGnuBasename("..", "..");
+ TestGnuBasename("///", "");
+ TestGnuBasename("//usr//lib//", "");
+}
+
+TEST(libgen_basename, posix_basename) {
+ TestPosixBasename(NULL, ".");
+ TestPosixBasename("", ".");
+ TestPosixBasename("/usr/lib", "lib");
+ TestPosixBasename("/system/bin/sh/", "sh");
+ TestPosixBasename("/usr/", "usr");
+ TestPosixBasename("usr", "usr");
+ TestPosixBasename("/", "/");
+ TestPosixBasename(".", ".");
+ TestPosixBasename("..", "..");
+ TestPosixBasename("///", "/");
+ TestPosixBasename("//usr//lib//", "lib");
+}
diff --git a/tests/libgen_test.cpp b/tests/libgen_test.cpp
index e9a5d5c..8a37a3f 100644
--- a/tests/libgen_test.cpp
+++ b/tests/libgen_test.cpp
@@ -19,15 +19,6 @@
#include <errno.h>
#include <gtest/gtest.h>
-static void TestBasename(const char* in, const char* expected_out) {
- char* writable_in = (in != NULL) ? strdup(in) : NULL;
- errno = 0;
- const char* out = basename(&writable_in[0]);
- ASSERT_STREQ(expected_out, out) << in;
- ASSERT_EQ(0, errno) << in;
- free(writable_in);
-}
-
static void TestDirname(const char* in, const char* expected_out) {
char* writable_in = (in != NULL) ? strdup(in) : NULL;
errno = 0;
@@ -37,21 +28,6 @@ static void TestDirname(const char* in, const char* expected_out) {
free(writable_in);
}
-// Do not use basename as the test name, it's defined to another value in glibc
-// so leads to a differently named test on host versus target architectures.
-TEST(libgen, posix_basename) {
- TestBasename(NULL, ".");
- TestBasename("", ".");
- TestBasename("/usr/lib", "lib");
- TestBasename("/usr/", "usr");
- TestBasename("usr", "usr");
- TestBasename("/", "/");
- TestBasename(".", ".");
- TestBasename("..", "..");
- TestBasename("///", "/");
- TestBasename("//usr//lib//", "lib");
-}
-
TEST(libgen, dirname) {
TestDirname(NULL, ".");
TestDirname("", ".");
diff --git a/tests/regex_test.cpp b/tests/regex_test.cpp
index d026221..4a4409e 100644
--- a/tests/regex_test.cpp
+++ b/tests/regex_test.cpp
@@ -36,3 +36,13 @@ TEST(regex, smoke) {
regfree(&re);
}
+
+TEST(regex, match_offsets) {
+ regex_t re;
+ regmatch_t matches[1];
+ ASSERT_EQ(0, regcomp(&re, "b", 0));
+ ASSERT_EQ(0, regexec(&re, "abc", 1, matches, 0));
+ ASSERT_EQ(1, matches[0].rm_so);
+ ASSERT_EQ(2, matches[0].rm_eo);
+ regfree(&re);
+}
diff --git a/tests/string_test.cpp b/tests/string_test.cpp
index 1d63c76..3d97d81 100644
--- a/tests/string_test.cpp
+++ b/tests/string_test.cpp
@@ -1166,7 +1166,7 @@ static size_t LargeSetIncrement(size_t len) {
return 1;
}
-#define STRCAT_DST_LEN 128
+#define STRCAT_DST_LEN 64
static void DoStrcatTest(uint8_t* src, uint8_t* dst, size_t len) {
if (len >= 1) {
@@ -1181,7 +1181,7 @@ static void DoStrcatTest(uint8_t* src, uint8_t* dst, size_t len) {
int value2 = 32 + (value + 2) % 96;
memset(cmp_buf, value2, sizeof(cmp_buf));
- for (size_t i = 1; i <= STRCAT_DST_LEN; i++) {
+ for (size_t i = 1; i <= STRCAT_DST_LEN;) {
memset(dst, value2, i-1);
memset(dst+i-1, 0, len-i);
src[len-i] = '\0';
@@ -1189,6 +1189,13 @@ static void DoStrcatTest(uint8_t* src, uint8_t* dst, size_t len) {
reinterpret_cast<char*>(src))));
ASSERT_TRUE(memcmp(dst, cmp_buf, i-1) == 0);
ASSERT_TRUE(memcmp(src, dst+i-1, len-i+1) == 0);
+ // This is an expensive loop, so don't loop through every value,
+ // get to a certain size and then start doubling.
+ if (i < 16) {
+ i++;
+ } else {
+ i <<= 1;
+ }
}
} else {
dst[0] = '\0';
@@ -1221,7 +1228,7 @@ static void DoStrlcatTest(uint8_t* src, uint8_t* dst, size_t len) {
int value2 = 32 + (value + 2) % 96;
memset(cmp_buf, value2, sizeof(cmp_buf));
- for (size_t i = 1; i <= STRCAT_DST_LEN; i++) {
+ for (size_t i = 1; i <= STRCAT_DST_LEN;) {
memset(dst, value2, i-1);
memset(dst+i-1, 0, len-i);
src[len-i] = '\0';
@@ -1229,6 +1236,13 @@ static void DoStrlcatTest(uint8_t* src, uint8_t* dst, size_t len) {
reinterpret_cast<char*>(src), len));
ASSERT_TRUE(memcmp(dst, cmp_buf, i-1) == 0);
ASSERT_TRUE(memcmp(src, dst+i-1, len-i+1) == 0);
+ // This is an expensive loop, so don't loop through every value,
+ // get to a certain size and then start doubling.
+ if (i < 16) {
+ i++;
+ } else {
+ i <<= 1;
+ }
}
} else {
dst[0] = '\0';
diff --git a/tests/sys_resource_test.cpp b/tests/sys_resource_test.cpp
index 8cefc65..0b6b6ef 100644
--- a/tests/sys_resource_test.cpp
+++ b/tests/sys_resource_test.cpp
@@ -33,7 +33,8 @@ class SysResourceTest : public ::testing::Test {
virtual void SetUp() {
ASSERT_EQ(0, getrlimit(RLIMIT_CORE, &l32_));
ASSERT_EQ(0, getrlimit64(RLIMIT_CORE, &l64_));
- ASSERT_EQ(0, prlimit64(0, RLIMIT_CORE, NULL, &pr_l64_));
+ ASSERT_EQ(0, prlimit(0, RLIMIT_CORE, nullptr, &pr_l32_));
+ ASSERT_EQ(0, prlimit64(0, RLIMIT_CORE, nullptr, &pr_l64_));
}
void CheckResourceLimits();
@@ -41,21 +42,28 @@ class SysResourceTest : public ::testing::Test {
protected:
rlimit l32_;
rlimit64 l64_;
+ rlimit pr_l32_;
rlimit64 pr_l64_;
};
void SysResourceTest::CheckResourceLimits() {
ASSERT_EQ(0, getrlimit(RLIMIT_CORE, &l32_));
ASSERT_EQ(0, getrlimit64(RLIMIT_CORE, &l64_));
- ASSERT_EQ(0, prlimit64(0, RLIMIT_CORE, NULL, &pr_l64_));
+ ASSERT_EQ(0, prlimit(0, RLIMIT_CORE, nullptr, &pr_l32_));
+ ASSERT_EQ(0, prlimit64(0, RLIMIT_CORE, nullptr, &pr_l64_));
+
+ ASSERT_EQ(l32_.rlim_cur, pr_l32_.rlim_cur);
ASSERT_EQ(l64_.rlim_cur, pr_l64_.rlim_cur);
+
if (l64_.rlim_cur == RLIM64_INFINITY) {
ASSERT_EQ(RLIM_INFINITY, l32_.rlim_cur);
} else {
ASSERT_EQ(l64_.rlim_cur, l32_.rlim_cur);
}
+ ASSERT_EQ(l32_.rlim_max, pr_l32_.rlim_max);
ASSERT_EQ(l64_.rlim_max, pr_l64_.rlim_max);
+
if (l64_.rlim_max == RLIM64_INFINITY) {
ASSERT_EQ(RLIM_INFINITY, l32_.rlim_max);
} else {
@@ -88,13 +96,16 @@ TEST_F(SysResourceTest, setrlimit64) {
ASSERT_EQ(456U, l64_.rlim_cur);
}
+TEST_F(SysResourceTest, prlimit) {
+ pr_l32_.rlim_cur = pr_l32_.rlim_max;
+ ASSERT_EQ(0, prlimit(0, RLIMIT_CORE, &pr_l32_, nullptr));
+ CheckResourceLimits();
+ ASSERT_EQ(pr_l32_.rlim_max, pr_l32_.rlim_cur);
+}
+
TEST_F(SysResourceTest, prlimit64) {
pr_l64_.rlim_cur = pr_l64_.rlim_max;
- ASSERT_EQ(0, prlimit64(0, RLIMIT_CORE, &pr_l64_, NULL));
+ ASSERT_EQ(0, prlimit64(0, RLIMIT_CORE, &pr_l64_, nullptr));
CheckResourceLimits();
ASSERT_EQ(pr_l64_.rlim_max, pr_l64_.rlim_cur);
}
-
-TEST_F(SysResourceTest, prlimit) {
- // prlimit is prlimit64 on LP64 and unimplemented on 32-bit. So we only test prlimit64.
-}