diff options
119 files changed, 4964 insertions, 2355 deletions
diff --git a/benchmarks/math_benchmark.cpp b/benchmarks/math_benchmark.cpp index 4de28d1..ed5b56c 100644 --- a/benchmarks/math_benchmark.cpp +++ b/benchmarks/math_benchmark.cpp @@ -65,6 +65,50 @@ void BM_math_logb::Run(int iters) { StopBenchmarkTiming(); } +BENCHMARK_WITH_ARG(BM_math_isfinite_macro, double)->AT_COMMON_VALS; +void BM_math_isfinite_macro::Run(int iters, double value) { + StartBenchmarkTiming(); + + d = 0.0; + v = value; + for (int i = 0; i < iters; ++i) { + d += isfinite(v); + } + + StopBenchmarkTiming(); +} + +#if defined(__BIONIC__) +#define test_isfinite __isfinite +#else +#define test_isfinite __finite +#endif +BENCHMARK_WITH_ARG(BM_math_isfinite, double)->AT_COMMON_VALS; +void BM_math_isfinite::Run(int iters, double value) { + StartBenchmarkTiming(); + + d = 0.0; + v = value; + for (int i = 0; i < iters; ++i) { + d += test_isfinite(v); + } + + StopBenchmarkTiming(); +} + +BENCHMARK_WITH_ARG(BM_math_isinf_macro, double)->AT_COMMON_VALS; +void BM_math_isinf_macro::Run(int iters, double value) { + StartBenchmarkTiming(); + + d = 0.0; + v = value; + for (int i = 0; i < iters; ++i) { + d += isinf(v); + } + + StopBenchmarkTiming(); +} + BENCHMARK_WITH_ARG(BM_math_isinf, double)->AT_COMMON_VALS; void BM_math_isinf::Run(int iters, double value) { StartBenchmarkTiming(); @@ -78,6 +122,60 @@ void BM_math_isinf::Run(int iters, double value) { StopBenchmarkTiming(); } +BENCHMARK_WITH_ARG(BM_math_isnan_macro, double)->AT_COMMON_VALS; +void BM_math_isnan_macro::Run(int iters, double value) { + StartBenchmarkTiming(); + + d = 0.0; + v = value; + for (int i = 0; i < iters; ++i) { + d += isnan(v); + } + + StopBenchmarkTiming(); +} + +BENCHMARK_WITH_ARG(BM_math_isnan, double)->AT_COMMON_VALS; +void BM_math_isnan::Run(int iters, double value) { + StartBenchmarkTiming(); + + d = 0.0; + v = value; + for (int i = 0; i < iters; ++i) { + d += (isnan)(v); + } + + StopBenchmarkTiming(); +} + +BENCHMARK_WITH_ARG(BM_math_isnormal_macro, double)->AT_COMMON_VALS; +void BM_math_isnormal_macro::Run(int iters, double value) { + StartBenchmarkTiming(); + + d = 0.0; + v = value; + for (int i = 0; i < iters; ++i) { + d += isnormal(v); + } + + StopBenchmarkTiming(); +} + +#if defined(__BIONIC__) +BENCHMARK_WITH_ARG(BM_math_isnormal, double)->AT_COMMON_VALS; +void BM_math_isnormal::Run(int iters, double value) { + StartBenchmarkTiming(); + + d = 0.0; + v = value; + for (int i = 0; i < iters; ++i) { + d += (__isnormal)(v); + } + + StopBenchmarkTiming(); +} +#endif + BENCHMARK_NO_ARG(BM_math_sin_fast); void BM_math_sin_fast::Run(int iters) { StartBenchmarkTiming(); @@ -134,3 +232,55 @@ void BM_math_fpclassify::Run(int iters, double value) { StopBenchmarkTiming(); } + +BENCHMARK_WITH_ARG(BM_math_signbit_macro, double)->AT_COMMON_VALS; +void BM_math_signbit_macro::Run(int iters, double value) { + StartBenchmarkTiming(); + + d = 0.0; + v = value; + for (int i = 0; i < iters; ++i) { + d += signbit(v); + } + + StopBenchmarkTiming(); +} + +BENCHMARK_WITH_ARG(BM_math_signbit, double)->AT_COMMON_VALS; +void BM_math_signbit::Run(int iters, double value) { + StartBenchmarkTiming(); + + d = 0.0; + v = value; + for (int i = 0; i < iters; ++i) { + d += (__signbit)(v); + } + + StopBenchmarkTiming(); +} + +BENCHMARK_WITH_ARG(BM_math_fabs_macro, double)->AT_COMMON_VALS; +void BM_math_fabs_macro::Run(int iters, double value) { + StartBenchmarkTiming(); + + d = 0.0; + v = value; + for (int i = 0; i < iters; ++i) { + d += fabs(v); + } + + StopBenchmarkTiming(); +} + +BENCHMARK_WITH_ARG(BM_math_fabs, double)->AT_COMMON_VALS; +void BM_math_fabs::Run(int iters, double value) { + StartBenchmarkTiming(); + + d = 0.0; + v = value; + for (int i = 0; i < iters; ++i) { + d += (fabs)(v); + } + + StopBenchmarkTiming(); +} diff --git a/libc/Android.mk b/libc/Android.mk index f0c5e9f..f7f2adc 100644 --- a/libc/Android.mk +++ b/libc/Android.mk @@ -611,6 +611,10 @@ ifneq ($(BOARD_MALLOC_ALIGNMENT),) libc_common_cflags += -DMALLOC_ALIGNMENT=$(BOARD_MALLOC_ALIGNMENT) endif +ifeq ($(BOARD_USES_LEGACY_MMAP),true) + libc_common_cflags += -DLEGACY_MMAP +endif + # Define some common conlyflags libc_common_conlyflags := \ -std=gnu99 @@ -1394,6 +1398,9 @@ LOCAL_SRC_FILES_arm += \ LOCAL_ADDRESS_SANITIZER := false LOCAL_NATIVE_COVERAGE := $(bionic_coverage) +# Allow devices to provide additional symbols +LOCAL_WHOLE_STATIC_LIBRARIES += $(BOARD_PROVIDES_ADDITIONAL_BIONIC_STATIC_LIBS) + include $(BUILD_SHARED_LIBRARY) diff --git a/libc/arch-arm/arm.mk b/libc/arch-arm/arm.mk index d72a160..c2b80c5 100644 --- a/libc/arch-arm/arm.mk +++ b/libc/arch-arm/arm.mk @@ -20,7 +20,6 @@ libc_freebsd_src_files_arm += \ upstream-freebsd/lib/libc/string/wmemmove.c \ libc_openbsd_src_files_arm += \ - upstream-openbsd/lib/libc/string/memchr.c \ upstream-openbsd/lib/libc/string/memrchr.c \ upstream-openbsd/lib/libc/string/stpncpy.c \ upstream-openbsd/lib/libc/string/strlcat.c \ @@ -52,7 +51,7 @@ ifeq ($(strip $(TARGET_$(my_2nd_arch_prefix)CPU_VARIANT)),) endif cpu_variant_mk := $(LOCAL_PATH)/arch-arm/$(TARGET_$(my_2nd_arch_prefix)CPU_VARIANT)/$(TARGET_$(my_2nd_arch_prefix)CPU_VARIANT).mk ifeq ($(wildcard $(cpu_variant_mk)),) -$(error "TARGET_$(my_2nd_arch_prefix)CPU_VARIANT not set or set to an unknown value. Possible values are cortex-a7, cortex-a8, cortex-a9, cortex-a15, krait, denver. Use generic for devices that do not have a CPU similar to any of the supported cpu variants.") +$(error "TARGET_$(my_2nd_arch_prefix)CPU_VARIANT not set or set to an unknown value. Possible values are cortex-a7, cortex-a8, cortex-a9, cortex-a15, krait, scorpion, denver. Use generic for devices that do not have a CPU similar to any of the supported cpu variants.") endif include $(cpu_variant_mk) libc_common_additional_dependencies += $(cpu_variant_mk) diff --git a/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S b/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S index a2e9c22..3692f04 100644 --- a/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S +++ b/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013 The Android Open Source Project + * Copyright (C) 2015 The Android Open Source Project * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,191 +26,7 @@ * SUCH DAMAGE. */ -#include <private/bionic_asm.h> -#include <private/libc_events.h> +// Indicate which memcpy base file to include. +#define MEMCPY_BASE "memcpy_base.S" - .syntax unified - - .thumb - .thumb_func - -// Get the length of src string, then get the source of the dst string. -// Check that the two lengths together don't exceed the threshold, then -// do a memcpy of the data. -ENTRY(__strcat_chk) - pld [r0, #0] - push {r0, lr} - .cfi_def_cfa_offset 8 - .cfi_rel_offset r0, 0 - .cfi_rel_offset lr, 4 - push {r4, r5} - .cfi_adjust_cfa_offset 8 - .cfi_rel_offset r4, 0 - .cfi_rel_offset r5, 4 - - mov lr, r2 - - // Save the dst register to r5 - mov r5, r0 - - // Zero out r4 - eor r4, r4, r4 - - // r1 contains the address of the string to count. -.L_strlen_start: - mov r0, r1 - ands r3, r1, #7 - beq .L_mainloop - - // Align to a double word (64 bits). - rsb r3, r3, #8 - lsls ip, r3, #31 - beq .L_align_to_32 - - ldrb r2, [r1], #1 - cbz r2, .L_update_count_and_finish - -.L_align_to_32: - bcc .L_align_to_64 - ands ip, r3, #2 - beq .L_align_to_64 - - ldrb r2, [r1], #1 - cbz r2, .L_update_count_and_finish - ldrb r2, [r1], #1 - cbz r2, .L_update_count_and_finish - -.L_align_to_64: - tst r3, #4 - beq .L_mainloop - ldr r3, [r1], #4 - - sub ip, r3, #0x01010101 - bic ip, ip, r3 - ands ip, ip, #0x80808080 - bne .L_zero_in_second_register - - .p2align 2 -.L_mainloop: - ldrd r2, r3, [r1], #8 - - pld [r1, #64] - - sub ip, r2, #0x01010101 - bic ip, ip, r2 - ands ip, ip, #0x80808080 - bne .L_zero_in_first_register - - sub ip, r3, #0x01010101 - bic ip, ip, r3 - ands ip, ip, #0x80808080 - bne .L_zero_in_second_register - b .L_mainloop - -.L_update_count_and_finish: - sub r3, r1, r0 - sub r3, r3, #1 - b .L_finish - -.L_zero_in_first_register: - sub r3, r1, r0 - lsls r2, ip, #17 - bne .L_sub8_and_finish - bcs .L_sub7_and_finish - lsls ip, ip, #1 - bne .L_sub6_and_finish - - sub r3, r3, #5 - b .L_finish - -.L_sub8_and_finish: - sub r3, r3, #8 - b .L_finish - -.L_sub7_and_finish: - sub r3, r3, #7 - b .L_finish - -.L_sub6_and_finish: - sub r3, r3, #6 - b .L_finish - -.L_zero_in_second_register: - sub r3, r1, r0 - lsls r2, ip, #17 - bne .L_sub4_and_finish - bcs .L_sub3_and_finish - lsls ip, ip, #1 - bne .L_sub2_and_finish - - sub r3, r3, #1 - b .L_finish - -.L_sub4_and_finish: - sub r3, r3, #4 - b .L_finish - -.L_sub3_and_finish: - sub r3, r3, #3 - b .L_finish - -.L_sub2_and_finish: - sub r3, r3, #2 - -.L_finish: - cmp r4, #0 - bne .L_strlen_done - - // Time to get the dst string length. - mov r1, r5 - - // Save the original source address to r5. - mov r5, r0 - - // Save the current length (adding 1 for the terminator). - add r4, r3, #1 - b .L_strlen_start - - // r0 holds the pointer to the dst string. - // r3 holds the dst string length. - // r4 holds the src string length + 1. -.L_strlen_done: - add r2, r3, r4 - cmp r2, lr - bhi __strcat_chk_failed - - // Set up the registers for the memcpy code. - mov r1, r5 - pld [r1, #64] - mov r2, r4 - add r0, r0, r3 - pop {r4, r5} -END(__strcat_chk) - -#define MEMCPY_BASE __strcat_chk_memcpy_base -#define MEMCPY_BASE_ALIGNED __strcat_chk_memcpy_base_aligned - -#include "memcpy_base.S" - -ENTRY_PRIVATE(__strcat_chk_failed) - .cfi_def_cfa_offset 8 - .cfi_rel_offset r0, 0 - .cfi_rel_offset lr, 4 - .cfi_adjust_cfa_offset 8 - .cfi_rel_offset r4, 0 - .cfi_rel_offset r5, 4 - - ldr r0, error_message - ldr r1, error_code -1: - add r0, pc - bl __fortify_chk_fail -error_code: - .word BIONIC_EVENT_STRCAT_BUFFER_OVERFLOW -error_message: - .word error_string-(1b+4) -END(__strcat_chk_failed) - - .data -error_string: - .string "strcat: prevented write past end of buffer" +#include "__strcat_chk_common.S" diff --git a/libc/arch-arm/cortex-a15/bionic/__strcat_chk_common.S b/libc/arch-arm/cortex-a15/bionic/__strcat_chk_common.S new file mode 100644 index 0000000..de66967 --- /dev/null +++ b/libc/arch-arm/cortex-a15/bionic/__strcat_chk_common.S @@ -0,0 +1,212 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <private/bionic_asm.h> +#include <private/libc_events.h> + + .syntax unified + + .thumb + .thumb_func + +// Get the length of src string, then get the source of the dst string. +// Check that the two lengths together don't exceed the threshold, then +// do a memcpy of the data. +ENTRY(__strcat_chk) + pld [r0, #0] + push {r0, lr} + .cfi_def_cfa_offset 8 + .cfi_rel_offset r0, 0 + .cfi_rel_offset lr, 4 + push {r4, r5} + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset r4, 0 + .cfi_rel_offset r5, 4 + + mov lr, r2 + + // Save the dst register to r5 + mov r5, r0 + + // Zero out r4 + eor r4, r4, r4 + + // r1 contains the address of the string to count. +.L_strlen_start: + mov r0, r1 + ands r3, r1, #7 + beq .L_mainloop + + // Align to a double word (64 bits). + rsb r3, r3, #8 + lsls ip, r3, #31 + beq .L_align_to_32 + + ldrb r2, [r1], #1 + cbz r2, .L_update_count_and_finish + +.L_align_to_32: + bcc .L_align_to_64 + ands ip, r3, #2 + beq .L_align_to_64 + + ldrb r2, [r1], #1 + cbz r2, .L_update_count_and_finish + ldrb r2, [r1], #1 + cbz r2, .L_update_count_and_finish + +.L_align_to_64: + tst r3, #4 + beq .L_mainloop + ldr r3, [r1], #4 + + sub ip, r3, #0x01010101 + bic ip, ip, r3 + ands ip, ip, #0x80808080 + bne .L_zero_in_second_register + + .p2align 2 +.L_mainloop: + ldrd r2, r3, [r1], #8 + + pld [r1, #64] + + sub ip, r2, #0x01010101 + bic ip, ip, r2 + ands ip, ip, #0x80808080 + bne .L_zero_in_first_register + + sub ip, r3, #0x01010101 + bic ip, ip, r3 + ands ip, ip, #0x80808080 + bne .L_zero_in_second_register + b .L_mainloop + +.L_update_count_and_finish: + sub r3, r1, r0 + sub r3, r3, #1 + b .L_finish + +.L_zero_in_first_register: + sub r3, r1, r0 + lsls r2, ip, #17 + bne .L_sub8_and_finish + bcs .L_sub7_and_finish + lsls ip, ip, #1 + bne .L_sub6_and_finish + + sub r3, r3, #5 + b .L_finish + +.L_sub8_and_finish: + sub r3, r3, #8 + b .L_finish + +.L_sub7_and_finish: + sub r3, r3, #7 + b .L_finish + +.L_sub6_and_finish: + sub r3, r3, #6 + b .L_finish + +.L_zero_in_second_register: + sub r3, r1, r0 + lsls r2, ip, #17 + bne .L_sub4_and_finish + bcs .L_sub3_and_finish + lsls ip, ip, #1 + bne .L_sub2_and_finish + + sub r3, r3, #1 + b .L_finish + +.L_sub4_and_finish: + sub r3, r3, #4 + b .L_finish + +.L_sub3_and_finish: + sub r3, r3, #3 + b .L_finish + +.L_sub2_and_finish: + sub r3, r3, #2 + +.L_finish: + cmp r4, #0 + bne .L_strlen_done + + // Time to get the dst string length. + mov r1, r5 + + // Save the original source address to r5. + mov r5, r0 + + // Save the current length (adding 1 for the terminator). + add r4, r3, #1 + b .L_strlen_start + + // r0 holds the pointer to the dst string. + // r3 holds the dst string length. + // r4 holds the src string length + 1. +.L_strlen_done: + add r2, r3, r4 + cmp r2, lr + bhi .L_strcat_chk_failed + + // Set up the registers for the memcpy code. + mov r1, r5 + pld [r1, #64] + mov r2, r4 + add r0, r0, r3 + pop {r4, r5} + .cfi_adjust_cfa_offset -8 + .cfi_restore r4 + .cfi_restore r5 + +#include MEMCPY_BASE + + // Undo the above cfi directives + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset r4, 0 + .cfi_rel_offset r5, 4 +.L_strcat_chk_failed: + ldr r0, error_message + ldr r1, error_code +1: + add r0, pc + bl __fortify_chk_fail +error_code: + .word BIONIC_EVENT_STRCAT_BUFFER_OVERFLOW +error_message: + .word error_string-(1b+4) +END(__strcat_chk) + + .data +error_string: + .string "strcat: prevented write past end of buffer" diff --git a/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S b/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S index db76686..d8cb3d9 100644 --- a/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S +++ b/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013 The Android Open Source Project + * Copyright (C) 2015 The Android Open Source Project * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,155 +26,7 @@ * SUCH DAMAGE. */ -#include <private/bionic_asm.h> -#include <private/libc_events.h> +// Indicate which memcpy base file to include. +#define MEMCPY_BASE "memcpy_base.S" - .syntax unified - - .thumb - .thumb_func - -// Get the length of the source string first, then do a memcpy of the data -// instead of a strcpy. -ENTRY(__strcpy_chk) - pld [r0, #0] - push {r0, lr} - .cfi_def_cfa_offset 8 - .cfi_rel_offset r0, 0 - .cfi_rel_offset lr, 4 - - mov lr, r2 - mov r0, r1 - - ands r3, r1, #7 - beq .L_mainloop - - // Align to a double word (64 bits). - rsb r3, r3, #8 - lsls ip, r3, #31 - beq .L_align_to_32 - - ldrb r2, [r0], #1 - cbz r2, .L_update_count_and_finish - -.L_align_to_32: - bcc .L_align_to_64 - ands ip, r3, #2 - beq .L_align_to_64 - - ldrb r2, [r0], #1 - cbz r2, .L_update_count_and_finish - ldrb r2, [r0], #1 - cbz r2, .L_update_count_and_finish - -.L_align_to_64: - tst r3, #4 - beq .L_mainloop - ldr r3, [r0], #4 - - sub ip, r3, #0x01010101 - bic ip, ip, r3 - ands ip, ip, #0x80808080 - bne .L_zero_in_second_register - - .p2align 2 -.L_mainloop: - ldrd r2, r3, [r0], #8 - - pld [r0, #64] - - sub ip, r2, #0x01010101 - bic ip, ip, r2 - ands ip, ip, #0x80808080 - bne .L_zero_in_first_register - - sub ip, r3, #0x01010101 - bic ip, ip, r3 - ands ip, ip, #0x80808080 - bne .L_zero_in_second_register - b .L_mainloop - -.L_update_count_and_finish: - sub r3, r0, r1 - sub r3, r3, #1 - b .L_check_size - -.L_zero_in_first_register: - sub r3, r0, r1 - lsls r2, ip, #17 - bne .L_sub8_and_finish - bcs .L_sub7_and_finish - lsls ip, ip, #1 - bne .L_sub6_and_finish - - sub r3, r3, #5 - b .L_check_size - -.L_sub8_and_finish: - sub r3, r3, #8 - b .L_check_size - -.L_sub7_and_finish: - sub r3, r3, #7 - b .L_check_size - -.L_sub6_and_finish: - sub r3, r3, #6 - b .L_check_size - -.L_zero_in_second_register: - sub r3, r0, r1 - lsls r2, ip, #17 - bne .L_sub4_and_finish - bcs .L_sub3_and_finish - lsls ip, ip, #1 - bne .L_sub2_and_finish - - sub r3, r3, #1 - b .L_check_size - -.L_sub4_and_finish: - sub r3, r3, #4 - b .L_check_size - -.L_sub3_and_finish: - sub r3, r3, #3 - b .L_check_size - -.L_sub2_and_finish: - sub r3, r3, #2 - -.L_check_size: - pld [r1, #0] - pld [r1, #64] - ldr r0, [sp] - cmp r3, lr - bhs __strcpy_chk_failed - - // Add 1 for copy length to get the string terminator. - add r2, r3, #1 -END(__strcpy_chk) - -#define MEMCPY_BASE __strcpy_chk_memcpy_base -#define MEMCPY_BASE_ALIGNED __strcpy_chk_memcpy_base_aligned -#include "memcpy_base.S" - -ENTRY_PRIVATE(__strcpy_chk_failed) - .cfi_def_cfa_offset 8 - .cfi_rel_offset r0, 0 - .cfi_rel_offset lr, 4 - - ldr r0, error_message - ldr r1, error_code -1: - add r0, pc - bl __fortify_chk_fail -error_code: - .word BIONIC_EVENT_STRCPY_BUFFER_OVERFLOW -error_message: - .word error_string-(1b+4) -END(__strcpy_chk_failed) - - .data -error_string: - .string "strcpy: prevented write past end of buffer" +#include "__strcpy_chk_common.S" diff --git a/libc/arch-arm/cortex-a15/bionic/__strcpy_chk_common.S b/libc/arch-arm/cortex-a15/bionic/__strcpy_chk_common.S new file mode 100644 index 0000000..69ebcb4 --- /dev/null +++ b/libc/arch-arm/cortex-a15/bionic/__strcpy_chk_common.S @@ -0,0 +1,173 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <private/bionic_asm.h> +#include <private/libc_events.h> + + .syntax unified + + .thumb + .thumb_func + +// Get the length of the source string first, then do a memcpy of the data +// instead of a strcpy. +ENTRY(__strcpy_chk) + pld [r0, #0] + push {r0, lr} + .cfi_def_cfa_offset 8 + .cfi_rel_offset r0, 0 + .cfi_rel_offset lr, 4 + + mov lr, r2 + mov r0, r1 + + ands r3, r1, #7 + beq .L_mainloop + + // Align to a double word (64 bits). + rsb r3, r3, #8 + lsls ip, r3, #31 + beq .L_align_to_32 + + ldrb r2, [r0], #1 + cbz r2, .L_update_count_and_finish + +.L_align_to_32: + bcc .L_align_to_64 + ands ip, r3, #2 + beq .L_align_to_64 + + ldrb r2, [r0], #1 + cbz r2, .L_update_count_and_finish + ldrb r2, [r0], #1 + cbz r2, .L_update_count_and_finish + +.L_align_to_64: + tst r3, #4 + beq .L_mainloop + ldr r3, [r0], #4 + + sub ip, r3, #0x01010101 + bic ip, ip, r3 + ands ip, ip, #0x80808080 + bne .L_zero_in_second_register + + .p2align 2 +.L_mainloop: + ldrd r2, r3, [r0], #8 + + pld [r0, #64] + + sub ip, r2, #0x01010101 + bic ip, ip, r2 + ands ip, ip, #0x80808080 + bne .L_zero_in_first_register + + sub ip, r3, #0x01010101 + bic ip, ip, r3 + ands ip, ip, #0x80808080 + bne .L_zero_in_second_register + b .L_mainloop + +.L_update_count_and_finish: + sub r3, r0, r1 + sub r3, r3, #1 + b .L_check_size + +.L_zero_in_first_register: + sub r3, r0, r1 + lsls r2, ip, #17 + bne .L_sub8_and_finish + bcs .L_sub7_and_finish + lsls ip, ip, #1 + bne .L_sub6_and_finish + + sub r3, r3, #5 + b .L_check_size + +.L_sub8_and_finish: + sub r3, r3, #8 + b .L_check_size + +.L_sub7_and_finish: + sub r3, r3, #7 + b .L_check_size + +.L_sub6_and_finish: + sub r3, r3, #6 + b .L_check_size + +.L_zero_in_second_register: + sub r3, r0, r1 + lsls r2, ip, #17 + bne .L_sub4_and_finish + bcs .L_sub3_and_finish + lsls ip, ip, #1 + bne .L_sub2_and_finish + + sub r3, r3, #1 + b .L_check_size + +.L_sub4_and_finish: + sub r3, r3, #4 + b .L_check_size + +.L_sub3_and_finish: + sub r3, r3, #3 + b .L_check_size + +.L_sub2_and_finish: + sub r3, r3, #2 + +.L_check_size: + pld [r1, #0] + pld [r1, #64] + ldr r0, [sp] + cmp r3, lr + bhs .L_strcpy_chk_failed + + // Add 1 for copy length to get the string terminator. + add r2, r3, #1 + +#include MEMCPY_BASE + +.L_strcpy_chk_failed: + ldr r0, error_message + ldr r1, error_code +1: + add r0, pc + bl __fortify_chk_fail +error_code: + .word BIONIC_EVENT_STRCPY_BUFFER_OVERFLOW +error_message: + .word error_string-(1b+4) +END(__strcpy_chk) + + .data +error_string: + .string "strcpy: prevented write past end of buffer" diff --git a/libc/arch-arm/cortex-a15/bionic/memcpy.S b/libc/arch-arm/cortex-a15/bionic/memcpy.S index 410b663..537f3de 100644 --- a/libc/arch-arm/cortex-a15/bionic/memcpy.S +++ b/libc/arch-arm/cortex-a15/bionic/memcpy.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008 The Android Open Source Project + * Copyright (C) 2015 The Android Open Source Project * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -25,79 +25,8 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ -/* - * Copyright (c) 2013 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -// Prototype: void *memcpy (void *dst, const void *src, size_t count). - -#include <private/bionic_asm.h> -#include <private/libc_events.h> - - .text - .syntax unified - .fpu neon - -ENTRY(__memcpy_chk) - cmp r2, r3 - bhi __memcpy_chk_fail - - // Fall through to memcpy... -END(__memcpy_chk) - -ENTRY(memcpy) - pld [r1, #64] - push {r0, lr} - .cfi_def_cfa_offset 8 - .cfi_rel_offset r0, 0 - .cfi_rel_offset lr, 4 -END(memcpy) - -#define MEMCPY_BASE __memcpy_base -#define MEMCPY_BASE_ALIGNED __memcpy_base_aligned -#include "memcpy_base.S" - -ENTRY_PRIVATE(__memcpy_chk_fail) - // Preserve lr for backtrace. - push {lr} - .cfi_def_cfa_offset 4 - .cfi_rel_offset lr, 0 - ldr r0, error_message - ldr r1, error_code -1: - add r0, pc - bl __fortify_chk_fail -error_code: - .word BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW -error_message: - .word error_string-(1b+8) -END(__memcpy_chk_fail) +// Indicate which memcpy base file to include. +#define MEMCPY_BASE "memcpy_base.S" - .data -error_string: - .string "memcpy: prevented write past end of buffer" +#include "memcpy_common.S" diff --git a/libc/arch-arm/cortex-a15/bionic/memcpy_base.S b/libc/arch-arm/cortex-a15/bionic/memcpy_base.S index 2a73852..aac737d 100644 --- a/libc/arch-arm/cortex-a15/bionic/memcpy_base.S +++ b/libc/arch-arm/cortex-a15/bionic/memcpy_base.S @@ -53,11 +53,7 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -ENTRY_PRIVATE(MEMCPY_BASE) - .cfi_def_cfa_offset 8 - .cfi_rel_offset r0, 0 - .cfi_rel_offset lr, 4 - +.L_memcpy_base: // Assumes that n >= 0, and dst, src are valid pointers. // For any sizes less than 832 use the neon code that doesn't // care about the src alignment. This avoids any checks @@ -168,12 +164,6 @@ ENTRY_PRIVATE(MEMCPY_BASE) eor r3, r0, r1 ands r3, r3, #0x3 bne .L_copy_unknown_alignment -END(MEMCPY_BASE) - -ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED) - .cfi_def_cfa_offset 8 - .cfi_rel_offset r0, 0 - .cfi_rel_offset lr, 4 // To try and improve performance, stack layout changed, // i.e., not keeping the stack looking like users expect @@ -185,7 +175,7 @@ ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED) strd r6, r7, [sp, #-8]! .cfi_adjust_cfa_offset 8 .cfi_rel_offset r6, 0 - .cfi_rel_offset r7, 0 + .cfi_rel_offset r7, 4 strd r8, r9, [sp, #-8]! .cfi_adjust_cfa_offset 8 .cfi_rel_offset r8, 0 @@ -291,10 +281,28 @@ ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED) // Restore registers: optimized pop {r0, pc} ldrd r8, r9, [sp], #8 + .cfi_adjust_cfa_offset -8 + .cfi_restore r8 + .cfi_restore r9 ldrd r6, r7, [sp], #8 + .cfi_adjust_cfa_offset -8 + .cfi_restore r6 + .cfi_restore r7 ldrd r4, r5, [sp], #8 + .cfi_adjust_cfa_offset -8 + .cfi_restore r4 + .cfi_restore r5 pop {r0, pc} + // Put the cfi directives back for the below instructions. + .cfi_adjust_cfa_offset 24 + .cfi_rel_offset r4, 0 + .cfi_rel_offset r5, 4 + .cfi_rel_offset r6, 8 + .cfi_rel_offset r7, 12 + .cfi_rel_offset r8, 16 + .cfi_rel_offset r9, 20 + .L_dst_not_word_aligned: // Align dst to word. rsb ip, ip, #4 @@ -315,4 +323,12 @@ ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED) // Src is guaranteed to be at least word aligned by this point. b .L_word_aligned -END(MEMCPY_BASE_ALIGNED) + + // Undo any cfi directives from above. + .cfi_adjust_cfa_offset -24 + .cfi_restore r4 + .cfi_restore r5 + .cfi_restore r6 + .cfi_restore r7 + .cfi_restore r8 + .cfi_restore r9 diff --git a/libc/arch-arm/cortex-a15/bionic/memcpy_common.S b/libc/arch-arm/cortex-a15/bionic/memcpy_common.S new file mode 100644 index 0000000..464fb46 --- /dev/null +++ b/libc/arch-arm/cortex-a15/bionic/memcpy_common.S @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * Copyright (c) 2013 ARM Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the company may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <private/bionic_asm.h> +#include <private/libc_events.h> + + .text + .syntax unified + .fpu neon + +ENTRY(__memcpy_chk) + cmp r2, r3 + bhi .L_memcpy_chk_fail + + // Fall through to memcpy... +END(__memcpy_chk) + +// Prototype: void *memcpy (void *dst, const void *src, size_t count). +ENTRY(memcpy) + pld [r1, #64] + push {r0, lr} + .cfi_def_cfa_offset 8 + .cfi_rel_offset r0, 0 + .cfi_rel_offset lr, 4 + +#include MEMCPY_BASE + + // Undo the cfi instructions from above. + .cfi_def_cfa_offset 0 + .cfi_restore r0 + .cfi_restore lr +.L_memcpy_chk_fail: + // Preserve lr for backtrace. + push {lr} + .cfi_adjust_cfa_offset 4 + .cfi_rel_offset lr, 0 + + ldr r0, error_message + ldr r1, error_code +1: + add r0, pc + bl __fortify_chk_fail +error_code: + .word BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW +error_message: + .word error_string-(1b+8) +END(memcpy) + + .data +error_string: + .string "memcpy: prevented write past end of buffer" diff --git a/libc/arch-arm/cortex-a15/bionic/strcat.S b/libc/arch-arm/cortex-a15/bionic/strcat.S index b95be94..157cc9f 100644 --- a/libc/arch-arm/cortex-a15/bionic/strcat.S +++ b/libc/arch-arm/cortex-a15/bionic/strcat.S @@ -70,7 +70,7 @@ .macro m_scan_byte ldrb r3, [r0] - cbz r3, strcat_r0_scan_done + cbz r3, .L_strcat_r0_scan_done add r0, #1 .endm // m_scan_byte @@ -84,10 +84,10 @@ ENTRY(strcat) // Quick check to see if src is empty. ldrb r2, [r1] pld [r1, #0] - cbnz r2, strcat_continue + cbnz r2, .L_strcat_continue bx lr -strcat_continue: +.L_strcat_continue: // To speed up really small dst strings, unroll checking the first 4 bytes. m_push m_scan_byte @@ -96,95 +96,102 @@ strcat_continue: m_scan_byte ands r3, r0, #7 - beq strcat_mainloop + beq .L_strcat_mainloop // Align to a double word (64 bits). rsb r3, r3, #8 lsls ip, r3, #31 - beq strcat_align_to_32 + beq .L_strcat_align_to_32 ldrb r5, [r0] - cbz r5, strcat_r0_scan_done + cbz r5, .L_strcat_r0_scan_done add r0, r0, #1 -strcat_align_to_32: - bcc strcat_align_to_64 +.L_strcat_align_to_32: + bcc .L_strcat_align_to_64 ldrb r2, [r0] - cbz r2, strcat_r0_scan_done + cbz r2, .L_strcat_r0_scan_done add r0, r0, #1 ldrb r4, [r0] - cbz r4, strcat_r0_scan_done + cbz r4, .L_strcat_r0_scan_done add r0, r0, #1 -strcat_align_to_64: +.L_strcat_align_to_64: tst r3, #4 - beq strcat_mainloop + beq .L_strcat_mainloop ldr r3, [r0], #4 sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcat_zero_in_second_register - b strcat_mainloop + bne .L_strcat_zero_in_second_register + b .L_strcat_mainloop -strcat_r0_scan_done: +.L_strcat_r0_scan_done: // For short copies, hard-code checking the first 8 bytes since this // new code doesn't win until after about 8 bytes. - m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue - -strcpy_finish: + m_copy_byte reg=r2, cmd=cbz, label=.L_strcpy_finish + m_copy_byte reg=r3, cmd=cbz, label=.L_strcpy_finish + m_copy_byte reg=r4, cmd=cbz, label=.L_strcpy_finish + m_copy_byte reg=r5, cmd=cbz, label=.L_strcpy_finish + m_copy_byte reg=r2, cmd=cbz, label=.L_strcpy_finish + m_copy_byte reg=r3, cmd=cbz, label=.L_strcpy_finish + m_copy_byte reg=r4, cmd=cbz, label=.L_strcpy_finish + m_copy_byte reg=r5, cmd=cbnz, label=.L_strcpy_continue + +.L_strcpy_finish: m_pop -strcpy_continue: +.L_strcpy_continue: ands r3, r0, #7 - beq strcpy_check_src_align + beq .L_strcpy_check_src_align // Align to a double word (64 bits). rsb r3, r3, #8 lsls ip, r3, #31 - beq strcpy_align_to_32 + beq .L_strcpy_align_to_32 ldrb r2, [r1], #1 strb r2, [r0], #1 - cbz r2, strcpy_complete + cbz r2, .L_strcpy_complete -strcpy_align_to_32: - bcc strcpy_align_to_64 +.L_strcpy_align_to_32: + bcc .L_strcpy_align_to_64 ldrb r2, [r1], #1 strb r2, [r0], #1 - cbz r2, strcpy_complete + cbz r2, .L_strcpy_complete ldrb r2, [r1], #1 strb r2, [r0], #1 - cbz r2, strcpy_complete + cbz r2, .L_strcpy_complete -strcpy_align_to_64: +.L_strcpy_align_to_64: tst r3, #4 - beq strcpy_check_src_align - ldr r2, [r1], #4 - - sub ip, r2, #0x01010101 - bic ip, ip, r2 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register - str r2, [r0], #4 + beq .L_strcpy_check_src_align + // Read one byte at a time since we don't know the src alignment + // and we don't want to read into a different page. + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .L_strcpy_complete + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .L_strcpy_complete + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .L_strcpy_complete + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .L_strcpy_complete -strcpy_check_src_align: +.L_strcpy_check_src_align: // At this point dst is aligned to a double word, check if src // is also aligned to a double word. ands r3, r1, #7 - bne strcpy_unaligned_copy + bne .L_strcpy_unaligned_copy .p2align 2 -strcpy_mainloop: +.L_strcpy_mainloop: ldrd r2, r3, [r1], #8 pld [r1, #64] @@ -192,128 +199,128 @@ strcpy_mainloop: sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .L_strcpy_zero_in_first_register sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .L_strcpy_zero_in_second_register strd r2, r3, [r0], #8 - b strcpy_mainloop + b .L_strcpy_mainloop -strcpy_complete: +.L_strcpy_complete: m_pop -strcpy_zero_in_first_register: +.L_strcpy_zero_in_first_register: lsls lr, ip, #17 - bne strcpy_copy1byte - bcs strcpy_copy2bytes + bne .L_strcpy_copy1byte + bcs .L_strcpy_copy2bytes lsls ip, ip, #1 - bne strcpy_copy3bytes + bne .L_strcpy_copy3bytes -strcpy_copy4bytes: +.L_strcpy_copy4bytes: // Copy 4 bytes to the destiniation. str r2, [r0] m_pop -strcpy_copy1byte: +.L_strcpy_copy1byte: strb r2, [r0] m_pop -strcpy_copy2bytes: +.L_strcpy_copy2bytes: strh r2, [r0] m_pop -strcpy_copy3bytes: +.L_strcpy_copy3bytes: strh r2, [r0], #2 lsr r2, #16 strb r2, [r0] m_pop -strcpy_zero_in_second_register: +.L_strcpy_zero_in_second_register: lsls lr, ip, #17 - bne strcpy_copy5bytes - bcs strcpy_copy6bytes + bne .L_strcpy_copy5bytes + bcs .L_strcpy_copy6bytes lsls ip, ip, #1 - bne strcpy_copy7bytes + bne .L_strcpy_copy7bytes // Copy 8 bytes to the destination. strd r2, r3, [r0] m_pop -strcpy_copy5bytes: +.L_strcpy_copy5bytes: str r2, [r0], #4 strb r3, [r0] m_pop -strcpy_copy6bytes: +.L_strcpy_copy6bytes: str r2, [r0], #4 strh r3, [r0] m_pop -strcpy_copy7bytes: +.L_strcpy_copy7bytes: str r2, [r0], #4 strh r3, [r0], #2 lsr r3, #16 strb r3, [r0] m_pop -strcpy_unaligned_copy: +.L_strcpy_unaligned_copy: // Dst is aligned to a double word, while src is at an unknown alignment. // There are 7 different versions of the unaligned copy code // to prevent overreading the src. The mainloop of every single version // will store 64 bits per loop. The difference is how much of src can // be read without potentially crossing a page boundary. tbb [pc, r3] -strcpy_unaligned_branchtable: +.L_strcpy_unaligned_branchtable: .byte 0 - .byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2) + .byte ((.L_strcpy_unalign7 - .L_strcpy_unaligned_branchtable)/2) + .byte ((.L_strcpy_unalign6 - .L_strcpy_unaligned_branchtable)/2) + .byte ((.L_strcpy_unalign5 - .L_strcpy_unaligned_branchtable)/2) + .byte ((.L_strcpy_unalign4 - .L_strcpy_unaligned_branchtable)/2) + .byte ((.L_strcpy_unalign3 - .L_strcpy_unaligned_branchtable)/2) + .byte ((.L_strcpy_unalign2 - .L_strcpy_unaligned_branchtable)/2) + .byte ((.L_strcpy_unalign1 - .L_strcpy_unaligned_branchtable)/2) .p2align 2 // Can read 7 bytes before possibly crossing a page. -strcpy_unalign7: +.L_strcpy_unalign7: ldr r2, [r1], #4 sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .L_strcpy_zero_in_first_register ldrb r3, [r1] - cbz r3, strcpy_unalign7_copy5bytes + cbz r3, .L_strcpy_unalign7_copy5bytes ldrb r4, [r1, #1] - cbz r4, strcpy_unalign7_copy6bytes + cbz r4, .L_strcpy_unalign7_copy6bytes ldrb r5, [r1, #2] - cbz r5, strcpy_unalign7_copy7bytes + cbz r5, .L_strcpy_unalign7_copy7bytes ldr r3, [r1], #4 pld [r1, #64] lsrs ip, r3, #24 strd r2, r3, [r0], #8 - beq strcpy_unalign_return - b strcpy_unalign7 + beq .L_strcpy_unalign_return + b .L_strcpy_unalign7 -strcpy_unalign7_copy5bytes: +.L_strcpy_unalign7_copy5bytes: str r2, [r0], #4 strb r3, [r0] -strcpy_unalign_return: +.L_strcpy_unalign_return: m_pop -strcpy_unalign7_copy6bytes: +.L_strcpy_unalign7_copy6bytes: str r2, [r0], #4 strb r3, [r0], #1 strb r4, [r0], #1 m_pop -strcpy_unalign7_copy7bytes: +.L_strcpy_unalign7_copy7bytes: str r2, [r0], #4 strb r3, [r0], #1 strb r4, [r0], #1 @@ -322,41 +329,41 @@ strcpy_unalign7_copy7bytes: .p2align 2 // Can read 6 bytes before possibly crossing a page. -strcpy_unalign6: +.L_strcpy_unalign6: ldr r2, [r1], #4 sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .L_strcpy_zero_in_first_register ldrb r4, [r1] - cbz r4, strcpy_unalign_copy5bytes + cbz r4, .L_strcpy_unalign_copy5bytes ldrb r5, [r1, #1] - cbz r5, strcpy_unalign_copy6bytes + cbz r5, .L_strcpy_unalign_copy6bytes ldr r3, [r1], #4 pld [r1, #64] tst r3, #0xff0000 - beq strcpy_copy7bytes + beq .L_strcpy_copy7bytes lsrs ip, r3, #24 strd r2, r3, [r0], #8 - beq strcpy_unalign_return - b strcpy_unalign6 + beq .L_strcpy_unalign_return + b .L_strcpy_unalign6 .p2align 2 // Can read 5 bytes before possibly crossing a page. -strcpy_unalign5: +.L_strcpy_unalign5: ldr r2, [r1], #4 sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .L_strcpy_zero_in_first_register ldrb r4, [r1] - cbz r4, strcpy_unalign_copy5bytes + cbz r4, .L_strcpy_unalign_copy5bytes ldr r3, [r1], #4 @@ -365,17 +372,17 @@ strcpy_unalign5: sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .L_strcpy_zero_in_second_register strd r2, r3, [r0], #8 - b strcpy_unalign5 + b .L_strcpy_unalign5 -strcpy_unalign_copy5bytes: +.L_strcpy_unalign_copy5bytes: str r2, [r0], #4 strb r4, [r0] m_pop -strcpy_unalign_copy6bytes: +.L_strcpy_unalign_copy6bytes: str r2, [r0], #4 strb r4, [r0], #1 strb r5, [r0] @@ -383,13 +390,13 @@ strcpy_unalign_copy6bytes: .p2align 2 // Can read 4 bytes before possibly crossing a page. -strcpy_unalign4: +.L_strcpy_unalign4: ldr r2, [r1], #4 sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .L_strcpy_zero_in_first_register ldr r3, [r1], #4 pld [r1, #64] @@ -397,20 +404,20 @@ strcpy_unalign4: sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .L_strcpy_zero_in_second_register strd r2, r3, [r0], #8 - b strcpy_unalign4 + b .L_strcpy_unalign4 .p2align 2 // Can read 3 bytes before possibly crossing a page. -strcpy_unalign3: +.L_strcpy_unalign3: ldrb r2, [r1] - cbz r2, strcpy_unalign3_copy1byte + cbz r2, .L_strcpy_unalign3_copy1byte ldrb r3, [r1, #1] - cbz r3, strcpy_unalign3_copy2bytes + cbz r3, .L_strcpy_unalign3_copy2bytes ldrb r4, [r1, #2] - cbz r4, strcpy_unalign3_copy3bytes + cbz r4, .L_strcpy_unalign3_copy3bytes ldr r2, [r1], #4 ldr r3, [r1], #4 @@ -418,26 +425,26 @@ strcpy_unalign3: pld [r1, #64] lsrs lr, r2, #24 - beq strcpy_copy4bytes + beq .L_strcpy_copy4bytes sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .L_strcpy_zero_in_second_register strd r2, r3, [r0], #8 - b strcpy_unalign3 + b .L_strcpy_unalign3 -strcpy_unalign3_copy1byte: +.L_strcpy_unalign3_copy1byte: strb r2, [r0] m_pop -strcpy_unalign3_copy2bytes: +.L_strcpy_unalign3_copy2bytes: strb r2, [r0], #1 strb r3, [r0] m_pop -strcpy_unalign3_copy3bytes: +.L_strcpy_unalign3_copy3bytes: strb r2, [r0], #1 strb r3, [r0], #1 strb r4, [r0] @@ -445,34 +452,34 @@ strcpy_unalign3_copy3bytes: .p2align 2 // Can read 2 bytes before possibly crossing a page. -strcpy_unalign2: +.L_strcpy_unalign2: ldrb r2, [r1] - cbz r2, strcpy_unalign_copy1byte + cbz r2, .L_strcpy_unalign_copy1byte ldrb r4, [r1, #1] - cbz r4, strcpy_unalign_copy2bytes + cbz r4, .L_strcpy_unalign_copy2bytes ldr r2, [r1], #4 ldr r3, [r1], #4 pld [r1, #64] tst r2, #0xff0000 - beq strcpy_copy3bytes + beq .L_strcpy_copy3bytes lsrs ip, r2, #24 - beq strcpy_copy4bytes + beq .L_strcpy_copy4bytes sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .L_strcpy_zero_in_second_register strd r2, r3, [r0], #8 - b strcpy_unalign2 + b .L_strcpy_unalign2 .p2align 2 // Can read 1 byte before possibly crossing a page. -strcpy_unalign1: +.L_strcpy_unalign1: ldrb r2, [r1] - cbz r2, strcpy_unalign_copy1byte + cbz r2, .L_strcpy_unalign_copy1byte ldr r2, [r1], #4 ldr r3, [r1], #4 @@ -482,27 +489,27 @@ strcpy_unalign1: sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .L_strcpy_zero_in_first_register sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .L_strcpy_zero_in_second_register strd r2, r3, [r0], #8 - b strcpy_unalign1 + b .L_strcpy_unalign1 -strcpy_unalign_copy1byte: +.L_strcpy_unalign_copy1byte: strb r2, [r0] m_pop -strcpy_unalign_copy2bytes: +.L_strcpy_unalign_copy2bytes: strb r2, [r0], #1 strb r4, [r0] m_pop .p2align 2 -strcat_mainloop: +.L_strcat_mainloop: ldrd r2, r3, [r0], #8 pld [r0, #64] @@ -510,59 +517,59 @@ strcat_mainloop: sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcat_zero_in_first_register + bne .L_strcat_zero_in_first_register sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcat_zero_in_second_register - b strcat_mainloop + bne .L_strcat_zero_in_second_register + b .L_strcat_mainloop -strcat_zero_in_first_register: +.L_strcat_zero_in_first_register: // Prefetch the src now, it's going to be used soon. pld [r1, #0] lsls lr, ip, #17 - bne strcat_sub8 - bcs strcat_sub7 + bne .L_strcat_sub8 + bcs .L_strcat_sub7 lsls ip, ip, #1 - bne strcat_sub6 + bne .L_strcat_sub6 sub r0, r0, #5 - b strcat_r0_scan_done + b .L_strcat_r0_scan_done -strcat_sub8: +.L_strcat_sub8: sub r0, r0, #8 - b strcat_r0_scan_done + b .L_strcat_r0_scan_done -strcat_sub7: +.L_strcat_sub7: sub r0, r0, #7 - b strcat_r0_scan_done + b .L_strcat_r0_scan_done -strcat_sub6: +.L_strcat_sub6: sub r0, r0, #6 - b strcat_r0_scan_done + b .L_strcat_r0_scan_done -strcat_zero_in_second_register: +.L_strcat_zero_in_second_register: // Prefetch the src now, it's going to be used soon. pld [r1, #0] lsls lr, ip, #17 - bne strcat_sub4 - bcs strcat_sub3 + bne .L_strcat_sub4 + bcs .L_strcat_sub3 lsls ip, ip, #1 - bne strcat_sub2 + bne .L_strcat_sub2 sub r0, r0, #1 - b strcat_r0_scan_done + b .L_strcat_r0_scan_done -strcat_sub4: +.L_strcat_sub4: sub r0, r0, #4 - b strcat_r0_scan_done + b .L_strcat_r0_scan_done -strcat_sub3: +.L_strcat_sub3: sub r0, r0, #3 - b strcat_r0_scan_done + b .L_strcat_r0_scan_done -strcat_sub2: +.L_strcat_sub2: sub r0, r0, #2 - b strcat_r0_scan_done + b .L_strcat_r0_scan_done END(strcat) diff --git a/libc/arch-arm/cortex-a15/bionic/string_copy.S b/libc/arch-arm/cortex-a15/bionic/string_copy.S index 20f0e91..92d1c98 100644 --- a/libc/arch-arm/cortex-a15/bionic/string_copy.S +++ b/libc/arch-arm/cortex-a15/bionic/string_copy.S @@ -149,13 +149,20 @@ ENTRY(strcpy) .Lstringcopy_align_to_64: tst r3, #4 beq .Lstringcopy_check_src_align - ldr r2, [r1], #4 - - sub ip, r2, #0x01010101 - bic ip, ip, r2 - ands ip, ip, #0x80808080 - bne .Lstringcopy_zero_in_first_register - str r2, [r0], #4 + // Read one byte at a time since we don't have any idea about the alignment + // of the source and we don't want to read into a different page. + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .Lstringcopy_complete + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .Lstringcopy_complete + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .Lstringcopy_complete + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .Lstringcopy_complete .Lstringcopy_check_src_align: // At this point dst is aligned to a double word, check if src diff --git a/libc/arch-arm/cortex-a15/bionic/strlen.S b/libc/arch-arm/cortex-a15/bionic/strlen.S index 9a0ce62..4fd6284 100644 --- a/libc/arch-arm/cortex-a15/bionic/strlen.S +++ b/libc/arch-arm/cortex-a15/bionic/strlen.S @@ -65,38 +65,38 @@ ENTRY(strlen) mov r1, r0 ands r3, r0, #7 - beq mainloop + beq .L_mainloop // Align to a double word (64 bits). rsb r3, r3, #8 lsls ip, r3, #31 - beq align_to_32 + beq .L_align_to_32 ldrb r2, [r1], #1 - cbz r2, update_count_and_return + cbz r2, .L_update_count_and_return -align_to_32: - bcc align_to_64 +.L_align_to_32: + bcc .L_align_to_64 ands ip, r3, #2 - beq align_to_64 + beq .L_align_to_64 ldrb r2, [r1], #1 - cbz r2, update_count_and_return + cbz r2, .L_update_count_and_return ldrb r2, [r1], #1 - cbz r2, update_count_and_return + cbz r2, .L_update_count_and_return -align_to_64: +.L_align_to_64: tst r3, #4 - beq mainloop + beq .L_mainloop ldr r3, [r1], #4 sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne zero_in_second_register + bne .L_zero_in_second_register .p2align 2 -mainloop: +.L_mainloop: ldrd r2, r3, [r1], #8 pld [r1, #64] @@ -104,62 +104,62 @@ mainloop: sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne zero_in_first_register + bne .L_zero_in_first_register sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne zero_in_second_register - b mainloop + bne .L_zero_in_second_register + b .L_mainloop -update_count_and_return: +.L_update_count_and_return: sub r0, r1, r0 sub r0, r0, #1 bx lr -zero_in_first_register: +.L_zero_in_first_register: sub r0, r1, r0 lsls r3, ip, #17 - bne sub8_and_return - bcs sub7_and_return + bne .L_sub8_and_return + bcs .L_sub7_and_return lsls ip, ip, #1 - bne sub6_and_return + bne .L_sub6_and_return sub r0, r0, #5 bx lr -sub8_and_return: +.L_sub8_and_return: sub r0, r0, #8 bx lr -sub7_and_return: +.L_sub7_and_return: sub r0, r0, #7 bx lr -sub6_and_return: +.L_sub6_and_return: sub r0, r0, #6 bx lr -zero_in_second_register: +.L_zero_in_second_register: sub r0, r1, r0 lsls r3, ip, #17 - bne sub4_and_return - bcs sub3_and_return + bne .L_sub4_and_return + bcs .L_sub3_and_return lsls ip, ip, #1 - bne sub2_and_return + bne .L_sub2_and_return sub r0, r0, #1 bx lr -sub4_and_return: +.L_sub4_and_return: sub r0, r0, #4 bx lr -sub3_and_return: +.L_sub3_and_return: sub r0, r0, #3 bx lr -sub2_and_return: +.L_sub2_and_return: sub r0, r0, #2 bx lr END(strlen) diff --git a/libc/arch-arm/cortex-a15/cortex-a15.mk b/libc/arch-arm/cortex-a15/cortex-a15.mk index 6fa3270..202a3bf 100644 --- a/libc/arch-arm/cortex-a15/cortex-a15.mk +++ b/libc/arch-arm/cortex-a15/cortex-a15.mk @@ -10,6 +10,7 @@ libc_bionic_src_files_arm += \ arch-arm/cortex-a15/bionic/strlen.S \ libc_bionic_src_files_arm += \ + arch-arm/generic/bionic/memchr.S \ arch-arm/generic/bionic/memcmp.S \ libc_bionic_src_files_arm += \ diff --git a/libc/arch-arm/cortex-a53.a57/cortex-a53.a57.mk b/libc/arch-arm/cortex-a53.a57/cortex-a53.a57.mk new file mode 100644 index 0000000..5d7efc6 --- /dev/null +++ b/libc/arch-arm/cortex-a53.a57/cortex-a53.a57.mk @@ -0,0 +1,22 @@ +# This file represents the best optimized routines that are the middle +# ground when running on a big/little system that is cortex-a57/cortex-a53. +# The cortex-a7 optimized routines, and the cortex-a53 optimized routines +# decrease performance on cortex-a57 processors by as much as 20%. + +libc_bionic_src_files_arm += \ + arch-arm/cortex-a15/bionic/memcpy.S \ + arch-arm/cortex-a15/bionic/memset.S \ + arch-arm/cortex-a15/bionic/stpcpy.S \ + arch-arm/cortex-a15/bionic/strcat.S \ + arch-arm/cortex-a15/bionic/__strcat_chk.S \ + arch-arm/cortex-a15/bionic/strcmp.S \ + arch-arm/cortex-a15/bionic/strcpy.S \ + arch-arm/cortex-a15/bionic/__strcpy_chk.S \ + arch-arm/cortex-a15/bionic/strlen.S \ + +libc_bionic_src_files_arm += \ + arch-arm/generic/bionic/memcmp.S \ + arch-arm/generic/bionic/memchr.S + +libc_bionic_src_files_arm += \ + arch-arm/denver/bionic/memmove.S \ diff --git a/libc/arch-arm/cortex-a53/bionic/__strcat_chk.S b/libc/arch-arm/cortex-a53/bionic/__strcat_chk.S new file mode 100644 index 0000000..c5bc98a --- /dev/null +++ b/libc/arch-arm/cortex-a53/bionic/__strcat_chk.S @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +// Indicate which memcpy base file to include. +#define MEMCPY_BASE "arch-arm/cortex-a53/bionic/memcpy_base.S" + +#include "arch-arm/cortex-a15/bionic/__strcat_chk_common.S" diff --git a/libc/arch-arm/cortex-a53/bionic/__strcpy_chk.S b/libc/arch-arm/cortex-a53/bionic/__strcpy_chk.S new file mode 100644 index 0000000..1f8945d --- /dev/null +++ b/libc/arch-arm/cortex-a53/bionic/__strcpy_chk.S @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +// Indicate which memcpy base file to include. +#define MEMCPY_BASE "arch-arm/cortex-a53/bionic/memcpy_base.S" + +#include "arch-arm/cortex-a15/bionic/__strcpy_chk_common.S" diff --git a/libc/arch-arm/cortex-a53/bionic/memcpy.S b/libc/arch-arm/cortex-a53/bionic/memcpy.S new file mode 100644 index 0000000..664f574 --- /dev/null +++ b/libc/arch-arm/cortex-a53/bionic/memcpy.S @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +// Indicate which memcpy base file to include. +#define MEMCPY_BASE "arch-arm/cortex-a53/bionic/memcpy_base.S" + +#include "arch-arm/cortex-a15/bionic/memcpy_common.S" diff --git a/libc/arch-arm/cortex-a53/bionic/memcpy_base.S b/libc/arch-arm/cortex-a53/bionic/memcpy_base.S new file mode 100644 index 0000000..2749fc8 --- /dev/null +++ b/libc/arch-arm/cortex-a53/bionic/memcpy_base.S @@ -0,0 +1,143 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * Copyright (c) 2013 ARM Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the company may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +.L_memcpy_base: + // Assumes that n >= 0, and dst, src are valid pointers. + cmp r2, #16 + blo .L_copy_less_than_16_unknown_align + +.L_copy_unknown_alignment: + // Unknown alignment of src and dst. + // Assumes that the first few bytes have already been prefetched. + + // Align destination to 128 bits. The mainloop store instructions + // require this alignment or they will throw an exception. + rsb r3, r0, #0 + ands r3, r3, #0xF + beq 2f + + // Copy up to 15 bytes (count in r3). + sub r2, r2, r3 + movs ip, r3, lsl #31 + + itt mi + ldrbmi lr, [r1], #1 + strbmi lr, [r0], #1 + itttt cs + ldrbcs ip, [r1], #1 + ldrbcs lr, [r1], #1 + strbcs ip, [r0], #1 + strbcs lr, [r0], #1 + + movs ip, r3, lsl #29 + bge 1f + // Copies 4 bytes, dst 32 bits aligned before, at least 64 bits after. + vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]! + vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]! +1: bcc 2f + // Copies 8 bytes, dst 64 bits aligned before, at least 128 bits after. + vld1.8 {d0}, [r1]! + vst1.8 {d0}, [r0, :64]! + +2: // Make sure we have at least 64 bytes to copy. + subs r2, r2, #64 + blo 2f + +1: // The main loop copies 64 bytes at a time. + vld1.8 {d0 - d3}, [r1]! + vld1.8 {d4 - d7}, [r1]! + subs r2, r2, #64 + vstmia r0!, {d0 - d7} + pld [r1, #(64*10)] + bhs 1b + +2: // Fix-up the remaining count and make sure we have >= 32 bytes left. + adds r2, r2, #32 + blo 3f + + // 32 bytes. These cache lines were already preloaded. + vld1.8 {d0 - d3}, [r1]! + sub r2, r2, #32 + vst1.8 {d0 - d3}, [r0, :128]! +3: // Less than 32 left. + add r2, r2, #32 + tst r2, #0x10 + beq .L_copy_less_than_16_unknown_align + // Copies 16 bytes, destination 128 bits aligned. + vld1.8 {d0, d1}, [r1]! + vst1.8 {d0, d1}, [r0, :128]! + +.L_copy_less_than_16_unknown_align: + // Copy up to 15 bytes (count in r2). + movs ip, r2, lsl #29 + bcc 1f + vld1.8 {d0}, [r1]! + vst1.8 {d0}, [r0]! +1: bge 2f + vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]! + vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]! + +2: // Copy 0 to 4 bytes. + lsls r2, r2, #31 + itt ne + ldrbne lr, [r1], #1 + strbne lr, [r0], #1 + itttt cs + ldrbcs ip, [r1], #1 + ldrbcs lr, [r1] + strbcs ip, [r0], #1 + strbcs lr, [r0] + + pop {r0, pc} diff --git a/libc/arch-arm/cortex-a53/cortex-a53.mk b/libc/arch-arm/cortex-a53/cortex-a53.mk index b5c337c..14aaa71 100644 --- a/libc/arch-arm/cortex-a53/cortex-a53.mk +++ b/libc/arch-arm/cortex-a53/cortex-a53.mk @@ -1 +1,21 @@ -include bionic/libc/arch-arm/cortex-a7/cortex-a7.mk +libc_bionic_src_files_arm += \ + arch-arm/cortex-a53/bionic/memcpy.S \ + arch-arm/cortex-a53/bionic/__strcat_chk.S \ + arch-arm/cortex-a53/bionic/__strcpy_chk.S \ + +libc_bionic_src_files_arm += \ + arch-arm/cortex-a7/bionic/memset.S \ + +libc_bionic_src_files_arm += \ + arch-arm/cortex-a15/bionic/stpcpy.S \ + arch-arm/cortex-a15/bionic/strcat.S \ + arch-arm/cortex-a15/bionic/strcmp.S \ + arch-arm/cortex-a15/bionic/strcpy.S \ + arch-arm/cortex-a15/bionic/strlen.S \ + +libc_bionic_src_files_arm += \ + arch-arm/generic/bionic/memchr.S \ + arch-arm/generic/bionic/memcmp.S \ + +libc_bionic_src_files_arm += \ + arch-arm/denver/bionic/memmove.S \ diff --git a/libc/arch-arm/cortex-a7/bionic/memset.S b/libc/arch-arm/cortex-a7/bionic/memset.S new file mode 100644 index 0000000..6365b06 --- /dev/null +++ b/libc/arch-arm/cortex-a7/bionic/memset.S @@ -0,0 +1,180 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <machine/cpu-features.h> +#include <private/bionic_asm.h> +#include <private/libc_events.h> + + /* + * Optimized memset() for ARM. + * + * memset() returns its first argument. + */ + + .fpu neon + .syntax unified + +ENTRY(__memset_chk) + cmp r2, r3 + bls .L_done + + // Preserve lr for backtrace. + push {lr} + .cfi_def_cfa_offset 4 + .cfi_rel_offset lr, 0 + + ldr r0, error_message + ldr r1, error_code +1: + add r0, pc + bl __fortify_chk_fail +error_code: + .word BIONIC_EVENT_MEMSET_BUFFER_OVERFLOW +error_message: + .word error_string-(1b+8) +END(__memset_chk) + +ENTRY(bzero) + mov r2, r1 + mov r1, #0 +.L_done: + // Fall through to memset... +END(bzero) + +ENTRY(memset) + mov r3, r0 + // At this point only d0, d1 are going to be used below. + vdup.8 q0, r1 + cmp r2, #16 + blo .L_set_less_than_16_unknown_align + +.L_check_alignment: + // Align destination to a double word to avoid the store crossing + // a cache line boundary. + ands ip, r3, #7 + bne .L_do_double_word_align + +.L_double_word_aligned: + // Duplicate since the less than 64 can use d2, d3. + vmov q1, q0 + subs r2, #64 + blo .L_set_less_than_64 + + // Duplicate the copy value so that we can store 64 bytes at a time. + vmov q2, q0 + vmov q3, q0 + +1: // Main loop stores 64 bytes at a time. + subs r2, #64 + vstmia r3!, {d0 - d7} + bge 1b + +.L_set_less_than_64: + // Restore r2 to the count of bytes left to set. + add r2, #64 + lsls ip, r2, #27 + bcc .L_set_less_than_32 + // Set 32 bytes. + vstmia r3!, {d0 - d3} + +.L_set_less_than_32: + bpl .L_set_less_than_16 + // Set 16 bytes. + vstmia r3!, {d0, d1} + +.L_set_less_than_16: + // Less than 16 bytes to set. + lsls ip, r2, #29 + bcc .L_set_less_than_8 + + // Set 8 bytes. + vstmia r3!, {d0} + +.L_set_less_than_8: + bpl .L_set_less_than_4 + // Set 4 bytes + vst1.32 {d0[0]}, [r3]! + +.L_set_less_than_4: + lsls ip, r2, #31 + it ne + strbne r1, [r3], #1 + itt cs + strbcs r1, [r3], #1 + strbcs r1, [r3] + bx lr + +.L_do_double_word_align: + rsb ip, ip, #8 + sub r2, r2, ip + + // Do this comparison now, otherwise we'll need to save a + // register to the stack since we've used all available + // registers. + cmp ip, #4 + blo 1f + + // Need to do a four byte copy. + movs ip, ip, lsl #31 + it mi + strbmi r1, [r3], #1 + itt cs + strbcs r1, [r3], #1 + strbcs r1, [r3], #1 + vst1.32 {d0[0]}, [r3]! + b .L_double_word_aligned + +1: + // No four byte copy. + movs ip, ip, lsl #31 + it mi + strbmi r1, [r3], #1 + itt cs + strbcs r1, [r3], #1 + strbcs r1, [r3], #1 + b .L_double_word_aligned + +.L_set_less_than_16_unknown_align: + // Set up to 15 bytes. + movs ip, r2, lsl #29 + bcc 1f + vst1.8 {d0}, [r3]! +1: bge 2f + vst1.32 {d0[0]}, [r3]! +2: movs ip, r2, lsl #31 + it mi + strbmi r1, [r3], #1 + itt cs + strbcs r1, [r3], #1 + strbcs r1, [r3], #1 + bx lr +END(memset) + + .data +error_string: + .string "memset: prevented write past end of buffer" diff --git a/libc/arch-arm/cortex-a7/cortex-a7.mk b/libc/arch-arm/cortex-a7/cortex-a7.mk index 9af03d9..3629a57 100644 --- a/libc/arch-arm/cortex-a7/cortex-a7.mk +++ b/libc/arch-arm/cortex-a7/cortex-a7.mk @@ -1 +1,19 @@ -include bionic/libc/arch-arm/cortex-a15/cortex-a15.mk +libc_bionic_src_files_arm += \ + arch-arm/cortex-a7/bionic/memset.S \ + +libc_bionic_src_files_arm += \ + arch-arm/cortex-a15/bionic/memcpy.S \ + arch-arm/cortex-a15/bionic/stpcpy.S \ + arch-arm/cortex-a15/bionic/strcat.S \ + arch-arm/cortex-a15/bionic/__strcat_chk.S \ + arch-arm/cortex-a15/bionic/strcmp.S \ + arch-arm/cortex-a15/bionic/strcpy.S \ + arch-arm/cortex-a15/bionic/__strcpy_chk.S \ + arch-arm/cortex-a15/bionic/strlen.S \ + +libc_bionic_src_files_arm += \ + arch-arm/generic/bionic/memchr.S \ + arch-arm/generic/bionic/memcmp.S \ + +libc_bionic_src_files_arm += \ + arch-arm/denver/bionic/memmove.S \ diff --git a/libc/arch-arm/cortex-a9/bionic/memcpy_base.S b/libc/arch-arm/cortex-a9/bionic/memcpy_base.S index 5e81305..6ab5a69 100644 --- a/libc/arch-arm/cortex-a9/bionic/memcpy_base.S +++ b/libc/arch-arm/cortex-a9/bionic/memcpy_base.S @@ -133,8 +133,7 @@ ENTRY_PRIVATE(MEMCPY_BASE) strbcs ip, [r0], #1 strbcs lr, [r0], #1 - ldmfd sp!, {r0, lr} - bx lr + ldmfd sp!, {r0, pc} END(MEMCPY_BASE) ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED) diff --git a/libc/arch-arm/cortex-a9/bionic/memset.S b/libc/arch-arm/cortex-a9/bionic/memset.S index 8ee6ac2..b39fcc4 100644 --- a/libc/arch-arm/cortex-a9/bionic/memset.S +++ b/libc/arch-arm/cortex-a9/bionic/memset.S @@ -69,12 +69,9 @@ END(bzero) ENTRY(memset) // The neon memset only wins for less than 132. cmp r2, #132 - bhi __memset_large_copy - - stmfd sp!, {r0} - .cfi_def_cfa_offset 4 - .cfi_rel_offset r0, 0 + bhi .L_memset_large_copy + mov r3, r0 vdup.8 q0, r1 /* make sure we have at least 32 bytes to write */ @@ -84,7 +81,7 @@ ENTRY(memset) 1: /* The main loop writes 32 bytes at a time */ subs r2, r2, #32 - vst1.8 {d0 - d3}, [r0]! + vst1.8 {d0 - d3}, [r3]! bhs 1b 2: /* less than 32 left */ @@ -93,22 +90,20 @@ ENTRY(memset) beq 3f // writes 16 bytes, 128-bits aligned - vst1.8 {d0, d1}, [r0]! + vst1.8 {d0, d1}, [r3]! 3: /* write up to 15-bytes (count in r2) */ movs ip, r2, lsl #29 bcc 1f - vst1.8 {d0}, [r0]! + vst1.8 {d0}, [r3]! 1: bge 2f - vst1.32 {d0[0]}, [r0]! + vst1.32 {d0[0]}, [r3]! 2: movs ip, r2, lsl #31 - strbmi r1, [r0], #1 - strbcs r1, [r0], #1 - strbcs r1, [r0], #1 - ldmfd sp!, {r0} + strbmi r1, [r3], #1 + strbcs r1, [r3], #1 + strbcs r1, [r3], #1 bx lr -END(memset) -ENTRY_PRIVATE(__memset_large_copy) +.L_memset_large_copy: /* compute the offset to align the destination * offset = (4-(src&3))&3 = -src & 3 */ @@ -136,8 +131,7 @@ ENTRY_PRIVATE(__memset_large_copy) strbcs r1, [r0], #1 strbmi r1, [r0], #1 subs r2, r2, r3 - popls {r0, r4-r7, lr} /* return */ - bxls lr + popls {r0, r4-r7, pc} /* return */ /* align the destination to a cache-line */ mov r12, r1 @@ -180,9 +174,8 @@ ENTRY_PRIVATE(__memset_large_copy) strhmi r1, [r0], #2 movs r2, r2, lsl #2 strbcs r1, [r0] - ldmfd sp!, {r0, r4-r7, lr} - bx lr -END(__memset_large_copy) + ldmfd sp!, {r0, r4-r7, pc} +END(memset) .data error_string: diff --git a/libc/arch-arm/cortex-a9/bionic/strcat.S b/libc/arch-arm/cortex-a9/bionic/strcat.S index f5a855e..9077a74 100644 --- a/libc/arch-arm/cortex-a9/bionic/strcat.S +++ b/libc/arch-arm/cortex-a9/bionic/strcat.S @@ -70,7 +70,7 @@ .macro m_scan_byte ldrb r3, [r0] - cbz r3, strcat_r0_scan_done + cbz r3, .Lstrcat_r0_scan_done add r0, #1 .endm // m_scan_byte @@ -84,10 +84,10 @@ ENTRY(strcat) // Quick check to see if src is empty. ldrb r2, [r1] pld [r1, #0] - cbnz r2, strcat_continue + cbnz r2, .Lstrcat_continue bx lr -strcat_continue: +.Lstrcat_continue: // To speed up really small dst strings, unroll checking the first 4 bytes. m_push m_scan_byte @@ -96,10 +96,10 @@ strcat_continue: m_scan_byte ands r3, r0, #7 - bne strcat_align_src + bne .Lstrcat_align_src .p2align 2 -strcat_mainloop: +.Lstrcat_mainloop: ldmia r0!, {r2, r3} pld [r0, #64] @@ -107,28 +107,28 @@ strcat_mainloop: sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcat_zero_in_first_register + bne .Lstrcat_zero_in_first_register sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcat_zero_in_second_register - b strcat_mainloop + bne .Lstrcat_zero_in_second_register + b .Lstrcat_mainloop -strcat_zero_in_first_register: +.Lstrcat_zero_in_first_register: sub r0, r0, #4 -strcat_zero_in_second_register: +.Lstrcat_zero_in_second_register: // Check for zero in byte 0. tst ip, #0x80 it ne subne r0, r0, #4 - bne strcat_r0_scan_done + bne .Lstrcat_r0_scan_done // Check for zero in byte 1. tst ip, #0x8000 it ne subne r0, r0, #3 - bne strcat_r0_scan_done + bne .Lstrcat_r0_scan_done // Check for zero in byte 2. tst ip, #0x800000 it ne @@ -137,33 +137,33 @@ strcat_zero_in_second_register: // Zero is in byte 3. subeq r0, r0, #1 -strcat_r0_scan_done: +.Lstrcat_r0_scan_done: // Unroll the first 8 bytes that will be copied. - m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue - -strcpy_finish: + m_copy_byte reg=r2, cmd=cbz, label=.Lstrcpy_finish + m_copy_byte reg=r3, cmd=cbz, label=.Lstrcpy_finish + m_copy_byte reg=r4, cmd=cbz, label=.Lstrcpy_finish + m_copy_byte reg=r5, cmd=cbz, label=.Lstrcpy_finish + m_copy_byte reg=r2, cmd=cbz, label=.Lstrcpy_finish + m_copy_byte reg=r3, cmd=cbz, label=.Lstrcpy_finish + m_copy_byte reg=r4, cmd=cbz, label=.Lstrcpy_finish + m_copy_byte reg=r5, cmd=cbnz, label=.Lstrcpy_continue + +.Lstrcpy_finish: m_ret inst=pop -strcpy_continue: +.Lstrcpy_continue: pld [r1, #0] ands r3, r0, #7 - bne strcpy_align_dst + bne .Lstrcpy_align_dst -strcpy_check_src_align: +.Lstrcpy_check_src_align: // At this point dst is aligned to a double word, check if src // is also aligned to a double word. ands r3, r1, #7 - bne strcpy_unaligned_copy + bne .Lstrcpy_unaligned_copy .p2align 2 -strcpy_mainloop: +.Lstrcpy_mainloop: ldmia r1!, {r2, r3} pld [r1, #64] @@ -171,17 +171,17 @@ strcpy_mainloop: sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .Lstrcpy_zero_in_first_register sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .Lstrcpy_zero_in_second_register stmia r0!, {r2, r3} - b strcpy_mainloop + b .Lstrcpy_mainloop -strcpy_zero_in_first_register: +.Lstrcpy_zero_in_first_register: lsls lr, ip, #17 itt ne strbne r2, [r0] @@ -198,7 +198,7 @@ strcpy_zero_in_first_register: strb r3, [r0] m_ret inst=pop -strcpy_zero_in_second_register: +.Lstrcpy_zero_in_second_register: lsls lr, ip, #17 ittt ne stmiane r0!, {r2} @@ -218,18 +218,18 @@ strcpy_zero_in_second_register: strb r4, [r0] m_ret inst=pop -strcpy_align_dst: +.Lstrcpy_align_dst: // Align to a double word (64 bits). rsb r3, r3, #8 lsls ip, r3, #31 - beq strcpy_align_to_32 + beq .Lstrcpy_align_to_32 ldrb r2, [r1], #1 strb r2, [r0], #1 - cbz r2, strcpy_complete + cbz r2, .Lstrcpy_complete -strcpy_align_to_32: - bcc strcpy_align_to_64 +.Lstrcpy_align_to_32: + bcc .Lstrcpy_align_to_64 ldrb r4, [r1], #1 strb r4, [r0], #1 @@ -242,76 +242,83 @@ strcpy_align_to_32: it eq m_ret inst=popeq -strcpy_align_to_64: +.Lstrcpy_align_to_64: tst r3, #4 - beq strcpy_check_src_align - ldr r2, [r1], #4 - - sub ip, r2, #0x01010101 - bic ip, ip, r2 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register - stmia r0!, {r2} - b strcpy_check_src_align + beq .Lstrcpy_check_src_align + // Read one byte at a time since we don't know the src alignment + // and we don't want to read into a different page. + ldrb r4, [r1], #1 + strb r4, [r0], #1 + cbz r4, .Lstrcpy_complete + ldrb r5, [r1], #1 + strb r5, [r0], #1 + cbz r5, .Lstrcpy_complete + ldrb r4, [r1], #1 + strb r4, [r0], #1 + cbz r4, .Lstrcpy_complete + ldrb r5, [r1], #1 + strb r5, [r0], #1 + cbz r5, .Lstrcpy_complete + b .Lstrcpy_check_src_align -strcpy_complete: +.Lstrcpy_complete: m_ret inst=pop -strcpy_unaligned_copy: +.Lstrcpy_unaligned_copy: // Dst is aligned to a double word, while src is at an unknown alignment. // There are 7 different versions of the unaligned copy code // to prevent overreading the src. The mainloop of every single version // will store 64 bits per loop. The difference is how much of src can // be read without potentially crossing a page boundary. tbb [pc, r3] -strcpy_unaligned_branchtable: +.Lstrcpy_unaligned_branchtable: .byte 0 - .byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2) + .byte ((.Lstrcpy_unalign7 - .Lstrcpy_unaligned_branchtable)/2) + .byte ((.Lstrcpy_unalign6 - .Lstrcpy_unaligned_branchtable)/2) + .byte ((.Lstrcpy_unalign5 - .Lstrcpy_unaligned_branchtable)/2) + .byte ((.Lstrcpy_unalign4 - .Lstrcpy_unaligned_branchtable)/2) + .byte ((.Lstrcpy_unalign3 - .Lstrcpy_unaligned_branchtable)/2) + .byte ((.Lstrcpy_unalign2 - .Lstrcpy_unaligned_branchtable)/2) + .byte ((.Lstrcpy_unalign1 - .Lstrcpy_unaligned_branchtable)/2) .p2align 2 // Can read 7 bytes before possibly crossing a page. -strcpy_unalign7: +.Lstrcpy_unalign7: ldr r2, [r1], #4 sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .Lstrcpy_zero_in_first_register ldrb r3, [r1] - cbz r3, strcpy_unalign7_copy5bytes + cbz r3, .Lstrcpy_unalign7_copy5bytes ldrb r4, [r1, #1] - cbz r4, strcpy_unalign7_copy6bytes + cbz r4, .Lstrcpy_unalign7_copy6bytes ldrb r5, [r1, #2] - cbz r5, strcpy_unalign7_copy7bytes + cbz r5, .Lstrcpy_unalign7_copy7bytes ldr r3, [r1], #4 pld [r1, #64] lsrs ip, r3, #24 stmia r0!, {r2, r3} - beq strcpy_unalign_return - b strcpy_unalign7 + beq .Lstrcpy_unalign_return + b .Lstrcpy_unalign7 -strcpy_unalign7_copy5bytes: +.Lstrcpy_unalign7_copy5bytes: stmia r0!, {r2} strb r3, [r0] -strcpy_unalign_return: +.Lstrcpy_unalign_return: m_ret inst=pop -strcpy_unalign7_copy6bytes: +.Lstrcpy_unalign7_copy6bytes: stmia r0!, {r2} strb r3, [r0], #1 strb r4, [r0], #1 m_ret inst=pop -strcpy_unalign7_copy7bytes: +.Lstrcpy_unalign7_copy7bytes: stmia r0!, {r2} strb r3, [r0], #1 strb r4, [r0], #1 @@ -320,30 +327,30 @@ strcpy_unalign7_copy7bytes: .p2align 2 // Can read 6 bytes before possibly crossing a page. -strcpy_unalign6: +.Lstrcpy_unalign6: ldr r2, [r1], #4 sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .Lstrcpy_zero_in_first_register ldrb r4, [r1] - cbz r4, strcpy_unalign_copy5bytes + cbz r4, .Lstrcpy_unalign_copy5bytes ldrb r5, [r1, #1] - cbz r5, strcpy_unalign_copy6bytes + cbz r5, .Lstrcpy_unalign_copy6bytes ldr r3, [r1], #4 pld [r1, #64] tst r3, #0xff0000 - beq strcpy_unalign6_copy7bytes + beq .Lstrcpy_unalign6_copy7bytes lsrs ip, r3, #24 stmia r0!, {r2, r3} - beq strcpy_unalign_return - b strcpy_unalign6 + beq .Lstrcpy_unalign_return + b .Lstrcpy_unalign6 -strcpy_unalign6_copy7bytes: +.Lstrcpy_unalign6_copy7bytes: stmia r0!, {r2} strh r3, [r0], #2 lsr r3, #16 @@ -352,16 +359,16 @@ strcpy_unalign6_copy7bytes: .p2align 2 // Can read 5 bytes before possibly crossing a page. -strcpy_unalign5: +.Lstrcpy_unalign5: ldr r2, [r1], #4 sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .Lstrcpy_zero_in_first_register ldrb r4, [r1] - cbz r4, strcpy_unalign_copy5bytes + cbz r4, .Lstrcpy_unalign_copy5bytes ldr r3, [r1], #4 @@ -370,17 +377,17 @@ strcpy_unalign5: sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .Lstrcpy_zero_in_second_register stmia r0!, {r2, r3} - b strcpy_unalign5 + b .Lstrcpy_unalign5 -strcpy_unalign_copy5bytes: +.Lstrcpy_unalign_copy5bytes: stmia r0!, {r2} strb r4, [r0] m_ret inst=pop -strcpy_unalign_copy6bytes: +.Lstrcpy_unalign_copy6bytes: stmia r0!, {r2} strb r4, [r0], #1 strb r5, [r0] @@ -388,13 +395,13 @@ strcpy_unalign_copy6bytes: .p2align 2 // Can read 4 bytes before possibly crossing a page. -strcpy_unalign4: +.Lstrcpy_unalign4: ldmia r1!, {r2} sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .Lstrcpy_zero_in_first_register ldmia r1!, {r3} pld [r1, #64] @@ -402,20 +409,20 @@ strcpy_unalign4: sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .Lstrcpy_zero_in_second_register stmia r0!, {r2, r3} - b strcpy_unalign4 + b .Lstrcpy_unalign4 .p2align 2 // Can read 3 bytes before possibly crossing a page. -strcpy_unalign3: +.Lstrcpy_unalign3: ldrb r2, [r1] - cbz r2, strcpy_unalign3_copy1byte + cbz r2, .Lstrcpy_unalign3_copy1byte ldrb r3, [r1, #1] - cbz r3, strcpy_unalign3_copy2bytes + cbz r3, .Lstrcpy_unalign3_copy2bytes ldrb r4, [r1, #2] - cbz r4, strcpy_unalign3_copy3bytes + cbz r4, .Lstrcpy_unalign3_copy3bytes ldr r2, [r1], #4 ldr r3, [r1], #4 @@ -423,26 +430,26 @@ strcpy_unalign3: pld [r1, #64] lsrs lr, r2, #24 - beq strcpy_unalign_copy4bytes + beq .Lstrcpy_unalign_copy4bytes sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .Lstrcpy_zero_in_second_register stmia r0!, {r2, r3} - b strcpy_unalign3 + b .Lstrcpy_unalign3 -strcpy_unalign3_copy1byte: +.Lstrcpy_unalign3_copy1byte: strb r2, [r0] m_ret inst=pop -strcpy_unalign3_copy2bytes: +.Lstrcpy_unalign3_copy2bytes: strb r2, [r0], #1 strb r3, [r0] m_ret inst=pop -strcpy_unalign3_copy3bytes: +.Lstrcpy_unalign3_copy3bytes: strb r2, [r0], #1 strb r3, [r0], #1 strb r4, [r0] @@ -450,34 +457,34 @@ strcpy_unalign3_copy3bytes: .p2align 2 // Can read 2 bytes before possibly crossing a page. -strcpy_unalign2: +.Lstrcpy_unalign2: ldrb r2, [r1] - cbz r2, strcpy_unalign_copy1byte + cbz r2, .Lstrcpy_unalign_copy1byte ldrb r3, [r1, #1] - cbz r3, strcpy_unalign_copy2bytes + cbz r3, .Lstrcpy_unalign_copy2bytes ldr r2, [r1], #4 ldr r3, [r1], #4 pld [r1, #64] tst r2, #0xff0000 - beq strcpy_unalign_copy3bytes + beq .Lstrcpy_unalign_copy3bytes lsrs ip, r2, #24 - beq strcpy_unalign_copy4bytes + beq .Lstrcpy_unalign_copy4bytes sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .Lstrcpy_zero_in_second_register stmia r0!, {r2, r3} - b strcpy_unalign2 + b .Lstrcpy_unalign2 .p2align 2 // Can read 1 byte before possibly crossing a page. -strcpy_unalign1: +.Lstrcpy_unalign1: ldrb r2, [r1] - cbz r2, strcpy_unalign_copy1byte + cbz r2, .Lstrcpy_unalign_copy1byte ldr r2, [r1], #4 ldr r3, [r1], #4 @@ -487,62 +494,62 @@ strcpy_unalign1: sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .Lstrcpy_zero_in_first_register sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .Lstrcpy_zero_in_second_register stmia r0!, {r2, r3} - b strcpy_unalign1 + b .Lstrcpy_unalign1 -strcpy_unalign_copy1byte: +.Lstrcpy_unalign_copy1byte: strb r2, [r0] m_ret inst=pop -strcpy_unalign_copy2bytes: +.Lstrcpy_unalign_copy2bytes: strb r2, [r0], #1 strb r3, [r0] m_ret inst=pop -strcpy_unalign_copy3bytes: +.Lstrcpy_unalign_copy3bytes: strh r2, [r0], #2 lsr r2, #16 strb r2, [r0] m_ret inst=pop -strcpy_unalign_copy4bytes: +.Lstrcpy_unalign_copy4bytes: stmia r0, {r2} m_ret inst=pop -strcat_align_src: +.Lstrcat_align_src: // Align to a double word (64 bits). rsb r3, r3, #8 lsls ip, r3, #31 - beq strcat_align_to_32 + beq .Lstrcat_align_to_32 ldrb r2, [r0], #1 - cbz r2, strcat_r0_update + cbz r2, .Lstrcat_r0_update -strcat_align_to_32: - bcc strcat_align_to_64 +.Lstrcat_align_to_32: + bcc .Lstrcat_align_to_64 ldrb r2, [r0], #1 - cbz r2, strcat_r0_update + cbz r2, .Lstrcat_r0_update ldrb r2, [r0], #1 - cbz r2, strcat_r0_update + cbz r2, .Lstrcat_r0_update -strcat_align_to_64: +.Lstrcat_align_to_64: tst r3, #4 - beq strcat_mainloop + beq .Lstrcat_mainloop ldr r3, [r0], #4 sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcat_zero_in_second_register - b strcat_mainloop + bne .Lstrcat_zero_in_second_register + b .Lstrcat_mainloop -strcat_r0_update: +.Lstrcat_r0_update: sub r0, r0, #1 - b strcat_r0_scan_done + b .Lstrcat_r0_scan_done END(strcat) diff --git a/libc/arch-arm/cortex-a9/bionic/string_copy.S b/libc/arch-arm/cortex-a9/bionic/string_copy.S index caf5a11..642db0f 100644 --- a/libc/arch-arm/cortex-a9/bionic/string_copy.S +++ b/libc/arch-arm/cortex-a9/bionic/string_copy.S @@ -244,13 +244,20 @@ ENTRY(strcpy) .Lstringcopy_align_to_64: tst r3, #4 beq .Lstringcopy_check_src_align - ldr r2, [r1], #4 - - sub ip, r2, #0x01010101 - bic ip, ip, r2 - ands ip, ip, #0x80808080 - bne .Lstringcopy_zero_in_first_register - stmia r0!, {r2} + // Read one byte at a time since we don't have any idea about the alignment + // of the source and we don't want to read into a different page. + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .Lstringcopy_complete + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .Lstringcopy_complete + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .Lstringcopy_complete + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .Lstringcopy_complete b .Lstringcopy_check_src_align .Lstringcopy_complete: diff --git a/libc/arch-arm/cortex-a9/cortex-a9.mk b/libc/arch-arm/cortex-a9/cortex-a9.mk index 7b38de1..db4bcc7 100644 --- a/libc/arch-arm/cortex-a9/cortex-a9.mk +++ b/libc/arch-arm/cortex-a9/cortex-a9.mk @@ -10,6 +10,7 @@ libc_bionic_src_files_arm += \ arch-arm/cortex-a9/bionic/strlen.S \ libc_bionic_src_files_arm += \ + arch-arm/generic/bionic/memchr.S \ arch-arm/generic/bionic/memcmp.S \ libc_bionic_src_files_arm += \ diff --git a/libc/arch-arm/denver/denver.mk b/libc/arch-arm/denver/denver.mk index 5fddf95..e81f8c7 100644 --- a/libc/arch-arm/denver/denver.mk +++ b/libc/arch-arm/denver/denver.mk @@ -1,4 +1,5 @@ libc_bionic_src_files_arm += \ + arch-arm/generic/bionic/memchr.S \ arch-arm/generic/bionic/memcmp.S \ arch-arm/denver/bionic/memcpy.S \ arch-arm/denver/bionic/memmove.S \ diff --git a/libc/arch-arm/generic/bionic/memchr.S b/libc/arch-arm/generic/bionic/memchr.S new file mode 100644 index 0000000..cb00d82 --- /dev/null +++ b/libc/arch-arm/generic/bionic/memchr.S @@ -0,0 +1,155 @@ +/* Copyright (c) 2010-2015, Linaro Limited + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Linaro Limited nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + Written by Dave Gilbert <david.gilbert@linaro.org> + + This memchr routine is optimised on a Cortex-A9 and should work on + all ARMv7 processors. It has a fast past for short sizes, and has + an optimised path for large data sets; the worst case is finding the + match early in a large data set. + + */ + +#include <private/bionic_asm.h> + +@ 2011-02-07 david.gilbert@linaro.org +@ Extracted from local git a5b438d861 +@ 2011-07-14 david.gilbert@linaro.org +@ Import endianness fix from local git ea786f1b +@ 2011-12-07 david.gilbert@linaro.org +@ Removed unneeded cbz from align loop + + .syntax unified + .arch armv7-a + +@ this lets us check a flag in a 00/ff byte easily in either endianness +#ifdef __ARMEB__ +#define CHARTSTMASK(c) 1<<(31-(c*8)) +#else +#define CHARTSTMASK(c) 1<<(c*8) +#endif + .text + .thumb + +@ --------------------------------------------------------------------------- + .thumb_func +ENTRY(memchr) + .p2align 4,,15 + @ r0 = start of memory to scan + @ r1 = character to look for + @ r2 = length + @ returns r0 = pointer to character or NULL if not found + and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char + + cmp r2,#16 @ If it's short don't bother with anything clever + blt 20f + + tst r0, #7 @ If it's already aligned skip the next bit + beq 10f + + @ Work up to an aligned point +5: + ldrb r3, [r0],#1 + subs r2, r2, #1 + cmp r3, r1 + beq 50f @ If it matches exit found + tst r0, #7 + bne 5b @ If not aligned yet then do next byte + +10: + @ At this point, we are aligned, we know we have at least 8 bytes to work with + push {r4,r5,r6,r7} + orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes + orr r1, r1, r1, lsl #16 + bic r4, r2, #7 @ Number of double words to work with + mvns r7, #0 @ all F's + movs r3, #0 + +15: + ldrd r5,r6,[r0],#8 + subs r4, r4, #8 + eor r5,r5, r1 @ Get it so that r5,r6 have 00's where the bytes match the target + eor r6,r6, r1 + uadd8 r5, r5, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 + sel r5, r3, r7 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION + uadd8 r6, r6, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 + sel r6, r5, r7 @ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION + cbnz r6, 60f + bne 15b @ (Flags from the subs above) If not run out of bytes then go around again + + pop {r4,r5,r6,r7} + and r1,r1,#0xff @ Get r1 back to a single character from the expansion above + and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done + +20: + cbz r2, 40f @ 0 length or hit the end already then not found + +21: @ Post aligned section, or just a short call + ldrb r3,[r0],#1 + subs r2,r2,#1 + eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub + cbz r3, 50f + bne 21b @ on r2 flags + +40: + movs r0,#0 @ not found + bx lr + +50: + subs r0,r0,#1 @ found + bx lr + +60: @ We're here because the fast path found a hit - now we have to track down exactly which word it was + @ r0 points to the start of the double word after the one that was tested + @ r5 has the 00/ff pattern for the first word, r6 has the chained value + cmp r5, #0 + itte eq + moveq r5, r6 @ the end is in the 2nd word + subeq r0,r0,#3 @ Points to 2nd byte of 2nd word + subne r0,r0,#7 @ or 2nd byte of 1st word + + @ r0 currently points to the 3rd byte of the word containing the hit + tst r5, # CHARTSTMASK(0) @ 1st character + bne 61f + adds r0,r0,#1 + tst r5, # CHARTSTMASK(1) @ 2nd character + ittt eq + addeq r0,r0,#1 + tsteq r5, # (3<<15) @ 2nd & 3rd character + @ If not the 3rd must be the last one + addeq r0,r0,#1 + +61: + pop {r4,r5,r6,r7} + subs r0,r0,#1 + bx lr +END(memchr) diff --git a/libc/arch-arm/generic/bionic/memcmp.S b/libc/arch-arm/generic/bionic/memcmp.S index c78dbd4..6643d55 100644 --- a/libc/arch-arm/generic/bionic/memcmp.S +++ b/libc/arch-arm/generic/bionic/memcmp.S @@ -221,8 +221,7 @@ ENTRY(memcmp) bne 8b 9: /* restore registers and return */ - ldmfd sp!, {r4, lr} - bx lr + ldmfd sp!, {r4, pc} 10: /* process less than 12 bytes */ cmp r2, #0 diff --git a/libc/arch-arm/generic/bionic/memcpy.S b/libc/arch-arm/generic/bionic/memcpy.S index ea5a399..65cba4c 100644 --- a/libc/arch-arm/generic/bionic/memcpy.S +++ b/libc/arch-arm/generic/bionic/memcpy.S @@ -194,8 +194,7 @@ ENTRY(memcpy) /* we're done! restore everything and return */ 1: ldmfd sp!, {r5-r11} - ldmfd sp!, {r0, r4, lr} - bx lr + ldmfd sp!, {r0, r4, pc} /********************************************************************/ @@ -385,8 +384,7 @@ ENTRY(memcpy) /* we're done! restore sp and spilled registers and return */ add sp, sp, #28 - ldmfd sp!, {r0, r4, lr} - bx lr + ldmfd sp!, {r0, r4, pc} END(memcpy) // Only reached when the __memcpy_chk check fails. diff --git a/libc/arch-arm/generic/bionic/memset.S b/libc/arch-arm/generic/bionic/memset.S index d17a9c4..b8eabbf 100644 --- a/libc/arch-arm/generic/bionic/memset.S +++ b/libc/arch-arm/generic/bionic/memset.S @@ -82,8 +82,7 @@ ENTRY(memset) strbcs r1, [r0], #1 strbmi r1, [r0], #1 subs r2, r2, r3 - popls {r0, r4-r7, lr} /* return */ - bxls lr + popls {r0, r4-r7, pc} /* return */ /* align the destination to a cache-line */ mov r12, r1 @@ -126,8 +125,7 @@ ENTRY(memset) strhmi r1, [r0], #2 movs r2, r2, lsl #2 strbcs r1, [r0] - ldmfd sp!, {r0, r4-r7, lr} - bx lr + ldmfd sp!, {r0, r4-r7, pc} END(memset) .data diff --git a/libc/arch-arm/generic/generic.mk b/libc/arch-arm/generic/generic.mk index e49d6d2..016c882 100644 --- a/libc/arch-arm/generic/generic.mk +++ b/libc/arch-arm/generic/generic.mk @@ -1,4 +1,5 @@ libc_bionic_src_files_arm += \ + arch-arm/generic/bionic/memchr.S \ arch-arm/generic/bionic/memcmp.S \ arch-arm/generic/bionic/memcpy.S \ arch-arm/generic/bionic/memset.S \ diff --git a/libc/arch-arm/krait/bionic/__strcat_chk.S b/libc/arch-arm/krait/bionic/__strcat_chk.S index 246f159..1a39c5b 100644 --- a/libc/arch-arm/krait/bionic/__strcat_chk.S +++ b/libc/arch-arm/krait/bionic/__strcat_chk.S @@ -40,7 +40,7 @@ ENTRY(__strcat_chk) pld [r0, #0] push {r0, lr} - .cfi_def_cfa_offset 8 + .cfi_adjust_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 push {r4, r5} @@ -177,7 +177,7 @@ ENTRY(__strcat_chk) .L_strlen_done: add r2, r3, r4 cmp r2, lr - bhi __strcat_chk_failed + bhi .L_strcat_chk_failed // Set up the registers for the memcpy code. mov r1, r5 @@ -185,20 +185,17 @@ ENTRY(__strcat_chk) mov r2, r4 add r0, r0, r3 pop {r4, r5} -END(__strcat_chk) + .cfi_adjust_cfa_offset -8 + .cfi_restore r4 + .cfi_restore r5 -#define MEMCPY_BASE __strcat_chk_memcpy_base -#define MEMCPY_BASE_ALIGNED __strcat_chk_memcpy_base_aligned #include "memcpy_base.S" -ENTRY_PRIVATE(__strcat_chk_failed) - .cfi_def_cfa_offset 8 - .cfi_rel_offset r0, 0 - .cfi_rel_offset lr, 4 + // Undo the above cfi directives. .cfi_adjust_cfa_offset 8 .cfi_rel_offset r4, 0 .cfi_rel_offset r5, 4 - +.L_strcat_chk_failed: ldr r0, error_message ldr r1, error_code 1: @@ -208,7 +205,7 @@ error_code: .word BIONIC_EVENT_STRCAT_BUFFER_OVERFLOW error_message: .word error_string-(1b+4) -END(__strcat_chk_failed) +END(__strcat_chk) .data error_string: diff --git a/libc/arch-arm/krait/bionic/__strcpy_chk.S b/libc/arch-arm/krait/bionic/__strcpy_chk.S index db76686..00202f3 100644 --- a/libc/arch-arm/krait/bionic/__strcpy_chk.S +++ b/libc/arch-arm/krait/bionic/__strcpy_chk.S @@ -39,7 +39,7 @@ ENTRY(__strcpy_chk) pld [r0, #0] push {r0, lr} - .cfi_def_cfa_offset 8 + .cfi_adjust_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 @@ -149,21 +149,14 @@ ENTRY(__strcpy_chk) pld [r1, #64] ldr r0, [sp] cmp r3, lr - bhs __strcpy_chk_failed + bhs .L_strcpy_chk_failed // Add 1 for copy length to get the string terminator. add r2, r3, #1 -END(__strcpy_chk) -#define MEMCPY_BASE __strcpy_chk_memcpy_base -#define MEMCPY_BASE_ALIGNED __strcpy_chk_memcpy_base_aligned #include "memcpy_base.S" -ENTRY_PRIVATE(__strcpy_chk_failed) - .cfi_def_cfa_offset 8 - .cfi_rel_offset r0, 0 - .cfi_rel_offset lr, 4 - +.L_strcpy_chk_failed: ldr r0, error_message ldr r1, error_code 1: @@ -173,7 +166,7 @@ error_code: .word BIONIC_EVENT_STRCPY_BUFFER_OVERFLOW error_message: .word error_string-(1b+4) -END(__strcpy_chk_failed) +END(__strcpy_chk) .data error_string: diff --git a/libc/arch-arm/krait/bionic/memcpy.S b/libc/arch-arm/krait/bionic/memcpy.S index 9ff46a8..5d27b57 100644 --- a/libc/arch-arm/krait/bionic/memcpy.S +++ b/libc/arch-arm/krait/bionic/memcpy.S @@ -45,7 +45,7 @@ ENTRY(__memcpy_chk) cmp r2, r3 - bhi __memcpy_chk_fail + bhi .L_memcpy_chk_fail // Fall through to memcpy... END(__memcpy_chk) @@ -53,19 +53,20 @@ END(__memcpy_chk) ENTRY(memcpy) pld [r1, #64] stmfd sp!, {r0, lr} - .cfi_def_cfa_offset 8 + .cfi_adjust_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 -END(memcpy) -#define MEMCPY_BASE __memcpy_base -#define MEMCPY_BASE_ALIGNED __memcpy_base_aligned #include "memcpy_base.S" -ENTRY_PRIVATE(__memcpy_chk_fail) + // Undo the cfi directives from above. + .cfi_adjust_cfa_offset -8 + .cfi_restore r0 + .cfi_restore lr +.L_memcpy_chk_fail: // Preserve lr for backtrace. push {lr} - .cfi_def_cfa_offset 4 + .cfi_adjust_cfa_offset 4 .cfi_rel_offset lr, 0 ldr r0, error_message @@ -77,7 +78,7 @@ error_code: .word BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW error_message: .word error_string-(1b+4) -END(__memcpy_chk_fail) +END(memcpy) .data error_string: diff --git a/libc/arch-arm/krait/bionic/memcpy_base.S b/libc/arch-arm/krait/bionic/memcpy_base.S index 035dcf1..76c5a84 100644 --- a/libc/arch-arm/krait/bionic/memcpy_base.S +++ b/libc/arch-arm/krait/bionic/memcpy_base.S @@ -1,123 +1,191 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - - -/* - * This code assumes it is running on a processor that supports all arm v7 - * instructions, that supports neon instructions, and that has a 32 byte - * cache line. - */ - -// Assumes neon instructions and a cache line size of 32 bytes. - -ENTRY_PRIVATE(MEMCPY_BASE) - .cfi_def_cfa_offset 8 - .cfi_rel_offset r0, 0 - .cfi_rel_offset lr, 4 - - /* do we have at least 16-bytes to copy (needed for alignment below) */ - cmp r2, #16 - blo 5f - - /* align destination to cache-line for the write-buffer */ - rsb r3, r0, #0 - ands r3, r3, #0xF - beq 2f - - /* copy up to 15-bytes (count in r3) */ - sub r2, r2, r3 - movs ip, r3, lsl #31 - itt mi - ldrbmi lr, [r1], #1 - strbmi lr, [r0], #1 - itttt cs - ldrbcs ip, [r1], #1 - ldrbcs lr, [r1], #1 - strbcs ip, [r0], #1 - strbcs lr, [r0], #1 - movs ip, r3, lsl #29 - bge 1f - // copies 4 bytes, destination 32-bits aligned - vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]! - vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]! -1: bcc 2f - // copies 8 bytes, destination 64-bits aligned - vld1.8 {d0}, [r1]! - vst1.8 {d0}, [r0, :64]! - -2: /* make sure we have at least 64 bytes to copy */ - subs r2, r2, #64 - blo 2f - -1: /* The main loop copies 64 bytes at a time */ - vld1.8 {d0 - d3}, [r1]! - vld1.8 {d4 - d7}, [r1]! - pld [r1, #(32*8)] - subs r2, r2, #64 - vst1.8 {d0 - d3}, [r0, :128]! - vst1.8 {d4 - d7}, [r0, :128]! - bhs 1b - -2: /* fix-up the remaining count and make sure we have >= 32 bytes left */ - adds r2, r2, #32 - blo 4f - - /* Copy 32 bytes. These cache lines were already preloaded */ - vld1.8 {d0 - d3}, [r1]! - sub r2, r2, #32 - vst1.8 {d0 - d3}, [r0, :128]! - -4: /* less than 32 left */ - add r2, r2, #32 - tst r2, #0x10 - beq 5f - // copies 16 bytes, 128-bits aligned - vld1.8 {d0, d1}, [r1]! - vst1.8 {d0, d1}, [r0, :128]! - -5: /* copy up to 15-bytes (count in r2) */ - movs ip, r2, lsl #29 - bcc 1f - vld1.8 {d0}, [r1]! - vst1.8 {d0}, [r0]! -1: bge 2f - vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]! - vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]! -2: movs ip, r2, lsl #31 - itt mi - ldrbmi r3, [r1], #1 - strbmi r3, [r0], #1 - itttt cs - ldrbcs ip, [r1], #1 - ldrbcs lr, [r1], #1 - strbcs ip, [r0], #1 - strbcs lr, [r0], #1 - - ldmfd sp!, {r0, lr} - bx lr -END(MEMCPY_BASE) +/*************************************************************************** + Copyright (c) 2009-2013 The Linux Foundation. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of The Linux Foundation nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/* Assumes neon instructions and a cache line size of 64 bytes. */ + +#include <machine/cpu-features.h> +#include <machine/asm.h> + +#define PLDOFFS (10) +#define PLDTHRESH (PLDOFFS) +#define BBTHRESH (4096/64) +#define PLDSIZE (64) + +#if (PLDOFFS < 1) +#error Routine does not support offsets less than 1 +#endif + +#if (PLDTHRESH < PLDOFFS) +#error PLD threshold must be greater than or equal to the PLD offset +#endif + + .text + .fpu neon + +.L_memcpy_base: + cmp r2, #4 + blt .L_neon_lt4 + cmp r2, #16 + blt .L_neon_lt16 + cmp r2, #32 + blt .L_neon_16 + cmp r2, #64 + blt .L_neon_copy_32_a + + mov r12, r2, lsr #6 + cmp r12, #PLDTHRESH + ble .L_neon_copy_64_loop_nopld + + push {r9, r10} + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset r9, 0 + .cfi_rel_offset r10, 4 + + cmp r12, #BBTHRESH + ble .L_neon_prime_pump + + add lr, r0, #0x400 + add r9, r1, #(PLDOFFS*PLDSIZE) + sub lr, lr, r9 + lsl lr, lr, #21 + lsr lr, lr, #21 + add lr, lr, #(PLDOFFS*PLDSIZE) + cmp r12, lr, lsr #6 + ble .L_neon_prime_pump + + itt gt + movgt r9, #(PLDOFFS) + rsbsgt r9, r9, lr, lsr #6 + ble .L_neon_prime_pump + + add r10, r1, lr + bic r10, #0x3F + + sub r12, r12, lr, lsr #6 + + cmp r9, r12 + itee le + suble r12, r12, r9 + movgt r9, r12 + movgt r12, #0 + + pld [r1, #((PLDOFFS-1)*PLDSIZE)] +.L_neon_copy_64_loop_outer_doublepld: + pld [r1, #((PLDOFFS)*PLDSIZE)] + vld1.32 {q0, q1}, [r1]! + vld1.32 {q2, q3}, [r1]! + ldr r3, [r10] + subs r9, r9, #1 + vst1.32 {q0, q1}, [r0]! + vst1.32 {q2, q3}, [r0]! + add r10, #64 + bne .L_neon_copy_64_loop_outer_doublepld + cmp r12, #0 + beq .L_neon_pop_before_nopld + + cmp r12, #(512*1024/64) + blt .L_neon_copy_64_loop_outer + +.L_neon_copy_64_loop_ddr: + vld1.32 {q0, q1}, [r1]! + vld1.32 {q2, q3}, [r1]! + pld [r10] + subs r12, r12, #1 + vst1.32 {q0, q1}, [r0]! + vst1.32 {q2, q3}, [r0]! + add r10, #64 + bne .L_neon_copy_64_loop_ddr + b .L_neon_pop_before_nopld + +.L_neon_prime_pump: + mov lr, #(PLDOFFS*PLDSIZE) + add r10, r1, #(PLDOFFS*PLDSIZE) + bic r10, #0x3F + sub r12, r12, #PLDOFFS + ldr r3, [r10, #(-1*PLDSIZE)] + +.L_neon_copy_64_loop_outer: + vld1.32 {q0, q1}, [r1]! + vld1.32 {q2, q3}, [r1]! + ldr r3, [r10] + subs r12, r12, #1 + vst1.32 {q0, q1}, [r0]! + vst1.32 {q2, q3}, [r0]! + add r10, #64 + bne .L_neon_copy_64_loop_outer + +.L_neon_pop_before_nopld: + mov r12, lr, lsr #6 + pop {r9, r10} + .cfi_adjust_cfa_offset -8 + .cfi_restore r9 + .cfi_restore r10 + +.L_neon_copy_64_loop_nopld: + vld1.32 {q8, q9}, [r1]! + vld1.32 {q10, q11}, [r1]! + subs r12, r12, #1 + vst1.32 {q8, q9}, [r0]! + vst1.32 {q10, q11}, [r0]! + bne .L_neon_copy_64_loop_nopld + ands r2, r2, #0x3f + beq .L_neon_exit + +.L_neon_copy_32_a: + movs r3, r2, lsl #27 + bcc .L_neon_16 + vld1.32 {q0,q1}, [r1]! + vst1.32 {q0,q1}, [r0]! + +.L_neon_16: + bpl .L_neon_lt16 + vld1.32 {q8}, [r1]! + vst1.32 {q8}, [r0]! + ands r2, r2, #0x0f + beq .L_neon_exit + +.L_neon_lt16: + movs r3, r2, lsl #29 + bcc 1f + vld1.8 {d0}, [r1]! + vst1.8 {d0}, [r0]! +1: + bge .L_neon_lt4 + vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]! + vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]! + +.L_neon_lt4: + movs r2, r2, lsl #31 + itt cs + ldrhcs r3, [r1], #2 + strhcs r3, [r0], #2 + itt mi + ldrbmi r3, [r1] + strbmi r3, [r0] + +.L_neon_exit: + pop {r0, pc} diff --git a/libc/arch-arm/krait/bionic/memmove.S b/libc/arch-arm/krait/bionic/memmove.S new file mode 100644 index 0000000..aea7315 --- /dev/null +++ b/libc/arch-arm/krait/bionic/memmove.S @@ -0,0 +1,219 @@ +/*************************************************************************** + Copyright (c) 2009-2014 The Linux Foundation. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of The Linux Foundation nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/*************************************************************************** + * Neon memmove: Attempts to do a memmove with Neon registers if possible, + * Inputs: + * dest: The destination buffer + * src: The source buffer + * n: The size of the buffer to transfer + * Outputs: + * + ***************************************************************************/ + +#include <private/bionic_asm.h> +#include <private/libc_events.h> +/* + * These can be overridden in: + * device/<vendor>/<board>/BoardConfig.mk + * by setting the following: + * TARGET_USE_KRAIT_BIONIC_OPTIMIZATION := true + * TARGET_USE_KRAIT_PLD_SET := true + * TARGET_KRAIT_BIONIC_PLDOFFS := <pldoffset> + * TARGET_KRAIT_BIONIC_PLDSIZE := <pldsize> + * TARGET_KRAIT_BIONIC_PLDTHRESH := <pldthreshold> + */ +#ifndef PLDOFFS +#define PLDOFFS (10) +#endif +#ifndef PLDTHRESH +#define PLDTHRESH (PLDOFFS) +#endif +#if (PLDOFFS < 5) +#error Routine does not support offsets less than 5 +#endif +#if (PLDTHRESH < PLDOFFS) +#error PLD threshold must be greater than or equal to the PLD offset +#endif +#ifndef PLDSIZE +#define PLDSIZE (64) +#endif + + .text + .syntax unified + .fpu neon + .thumb + .thumb_func + +//ENTRY(bcopy) +// //.cfi_startproc +// mov r12, r0 +// mov r0, r1 +// mov r1, r12 +// // Fall through to memmove +// //.cfi_endproc +//END(bcopy) + +ENTRY(memmove) +_memmove_words: + //.cfi_startproc + .save {r0, lr} + cmp r2, #0 + it ne + subsne r12, r0, r1 // Warning: do not combine these "it" blocks + it eq + bxeq lr +// memmove only if r1 < r0 < r1+r2 + cmp r0, r1 + itt ge + addge r12, r1, r2 + cmpge r12, r0 + it le + ble memcpy + cmp r2, #4 + it le + ble .Lneon_b2f_smallcopy_loop + push {r0, lr} + add r0, r0, r2 + add r1, r1, r2 + cmp r2, #64 + it ge + bge .Lneon_b2f_copy_64 + cmp r2, #32 + it ge + bge .Lneon_b2f_copy_32 + cmp r2, #8 + it ge + bge .Lneon_b2f_copy_8 + b .Lneon_b2f_copy_1 +.Lneon_b2f_copy_64: + mov r12, r2, lsr #6 + add r0, r0, #32 + add r1, r1, #32 + cmp r12, #PLDTHRESH + it le + ble .Lneon_b2f_copy_64_loop_nopld + sub r12, #PLDOFFS + sub lr, r1, #(PLDOFFS)*PLDSIZE +.Lneon_b2f_copy_64_loop_outer: + pld [lr] + sub r1, r1, #96 + sub r0, r0, #96 + vld1.32 {q0, q1}, [r1]! + vld1.32 {q2, q3}, [r1] + sub lr, lr, #64 + subs r12, r12, #1 + vst1.32 {q0, q1}, [r0]! + vst1.32 {q2, q3}, [r0] + it ne + bne .Lneon_b2f_copy_64_loop_outer + mov r12, #PLDOFFS +.Lneon_b2f_copy_64_loop_nopld: + sub r1, r1, #96 + sub r0, r0, #96 + vld1.32 {q8, q9}, [r1]! + vld1.32 {q10, q11}, [r1] + subs r12, r12, #1 + vst1.32 {q8, q9}, [r0]! + vst1.32 {q10, q11}, [r0] + it ne + bne .Lneon_b2f_copy_64_loop_nopld + ands r2, r2, #0x3f + it eq + beq .Lneon_memmove_done + sub r1, r1, #32 + sub r0, r0, #32 + cmp r2, #32 + it lt + blt .Lneon_b2f_copy_8 +.Lneon_b2f_copy_32: + sub r1, r1, #32 + sub r0, r0, #32 + vld1.32 {q0, q1}, [r1] + vst1.32 {q0, q1}, [r0] + ands r2, r2, #0x1f + it eq + beq .Lneon_memmove_done +.Lneon_b2f_copy_8: + movs r12, r2, lsr #0x3 + it eq + beq .Lneon_b2f_copy_1 +.Lneon_b2f_copy_8_loop: + sub r1, r1, #8 + sub r0, r0, #8 + vld1.32 {d0}, [r1] + subs r12, r12, #1 + vst1.32 {d0}, [r0] + it ne + bne .Lneon_b2f_copy_8_loop + ands r2, r2, #0x7 + beq .Lneon_memmove_done +.Lneon_b2f_copy_1: + movs r12, r2, lsl #29 + itttt mi + submi r1, r1, #4 + submi r0, r0, #4 + ldrmi r3, [r1] + strmi r3, [r0] + movs r2, r2, lsl #31 + itttt cs + subcs r1, r1, #2 + subcs r0, r0, #2 + ldrhcs r3, [r1] + strhcs r3, [r0] + itttt mi + submi r1, r1, #1 + submi r0, r0, #1 + ldrbmi r12, [r1] + strbmi r12, [r0] +.Lneon_memmove_done: + pop {r0, pc} +.Lneon_b2f_smallcopy_loop: + // 4 bytes or less + add r1, r1, r2 + add r0, r0, r2 + movs r12, r2, lsl #29 + itttt mi + submi r1, r1, #4 + submi r0, r0, #4 + ldrmi r3, [r1] + strmi r3, [r0] + movs r2, r2, lsl #31 + itttt cs + subcs r1, r1, #2 + subcs r0, r0, #2 + ldrhcs r3, [r1] + strhcs r3, [r0] + itttt mi + submi r1, r1, #1 + submi r0, r0, #1 + ldrbmi r12, [r1] + strbmi r12, [r0] + bx lr +// .cfi_endproc +END(memmove) + diff --git a/libc/arch-arm/krait/bionic/memset.S b/libc/arch-arm/krait/bionic/memset.S index a4fbe17..ae05965 100644 --- a/libc/arch-arm/krait/bionic/memset.S +++ b/libc/arch-arm/krait/bionic/memset.S @@ -69,10 +69,7 @@ END(bzero) /* memset() returns its first argument. */ ENTRY(memset) - stmfd sp!, {r0} - .cfi_def_cfa_offset 4 - .cfi_rel_offset r0, 0 - + mov r3, r0 vdup.8 q0, r1 /* make sure we have at least 32 bytes to write */ @@ -82,7 +79,7 @@ ENTRY(memset) 1: /* The main loop writes 32 bytes at a time */ subs r2, r2, #32 - vst1.8 {d0 - d3}, [r0]! + vst1.8 {d0 - d3}, [r3]! bhs 1b 2: /* less than 32 left */ @@ -91,18 +88,17 @@ ENTRY(memset) beq 3f // writes 16 bytes, 128-bits aligned - vst1.8 {d0, d1}, [r0]! + vst1.8 {d0, d1}, [r3]! 3: /* write up to 15-bytes (count in r2) */ movs ip, r2, lsl #29 bcc 1f - vst1.8 {d0}, [r0]! + vst1.8 {d0}, [r3]! 1: bge 2f - vst1.32 {d0[0]}, [r0]! + vst1.32 {d0[0]}, [r3]! 2: movs ip, r2, lsl #31 - strbmi r1, [r0], #1 - strbcs r1, [r0], #1 - strbcs r1, [r0], #1 - ldmfd sp!, {r0} + strbmi r1, [r3], #1 + strbcs r1, [r3], #1 + strbcs r1, [r3], #1 bx lr END(memset) diff --git a/libc/arch-arm/krait/krait.mk b/libc/arch-arm/krait/krait.mk index 88b4d66..5f5b414 100644 --- a/libc/arch-arm/krait/krait.mk +++ b/libc/arch-arm/krait/krait.mk @@ -1,9 +1,19 @@ libc_bionic_src_files_arm += \ - arch-arm/krait/bionic/memcpy.S \ arch-arm/krait/bionic/memset.S \ arch-arm/krait/bionic/strcmp.S \ arch-arm/krait/bionic/__strcat_chk.S \ arch-arm/krait/bionic/__strcpy_chk.S \ + arch-arm/krait/bionic/memmove.S + +#For some targets we don't need this optimization. +#Corresponding flag is defined in device specific folder. +ifeq ($(TARGET_CPU_MEMCPY_BASE_OPT_DISABLE),true) +libc_bionic_src_files_arm += \ + arch-arm/cortex-a15/bionic/memcpy.S +else +libc_bionic_src_files_arm += \ + arch-arm/krait/bionic/memcpy.S +endif # Use cortex-a15 versions of strcat/strcpy/strlen and standard memmove libc_bionic_src_files_arm += \ @@ -13,7 +23,7 @@ libc_bionic_src_files_arm += \ arch-arm/cortex-a15/bionic/strlen.S \ libc_bionic_src_files_arm += \ + arch-arm/generic/bionic/memchr.S \ arch-arm/generic/bionic/memcmp.S \ -libc_bionic_src_files_arm += \ - arch-arm/denver/bionic/memmove.S \ + diff --git a/libc/arch-arm/scorpion/scorpion.mk b/libc/arch-arm/scorpion/scorpion.mk new file mode 100644 index 0000000..ce18a7e --- /dev/null +++ b/libc/arch-arm/scorpion/scorpion.mk @@ -0,0 +1,18 @@ +# Use krait versions of memset/strcmp/memmove +libc_bionic_src_files_arm += \ + arch-arm/krait/bionic/memset.S \ + arch-arm/krait/bionic/strcmp.S \ + arch-arm/krait/bionic/memmove.S + +libc_bionic_src_files_arm += \ + arch-arm/cortex-a15/bionic/memcpy.S \ + arch-arm/cortex-a15/bionic/stpcpy.S \ + arch-arm/cortex-a15/bionic/strcat.S \ + arch-arm/cortex-a15/bionic/__strcat_chk.S \ + arch-arm/cortex-a15/bionic/strcpy.S \ + arch-arm/cortex-a15/bionic/__strcpy_chk.S \ + arch-arm/cortex-a15/bionic/strlen.S + +libc_bionic_src_files_arm += \ + arch-arm/generic/bionic/memchr.S \ + arch-arm/generic/bionic/memcmp.S diff --git a/libc/arch-arm64/arm64.mk b/libc/arch-arm64/arm64.mk index 470a038..1b8d534 100644 --- a/libc/arch-arm64/arm64.mk +++ b/libc/arch-arm64/arm64.mk @@ -8,7 +8,6 @@ libc_bionic_src_files_arm64 += \ bionic/__memset_chk.cpp \ bionic/__strcpy_chk.cpp \ bionic/__strcat_chk.cpp \ - bionic/strrchr.cpp \ libc_freebsd_src_files_arm64 += \ upstream-freebsd/lib/libc/string/wcscat.c \ diff --git a/libc/arch-arm64/denver64/bionic/memmove.S b/libc/arch-arm64/denver64/bionic/memmove.S new file mode 100644 index 0000000..739ce49 --- /dev/null +++ b/libc/arch-arm64/denver64/bionic/memmove.S @@ -0,0 +1,329 @@ +/* Copyright (c) 2014, Linaro Limited + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Linaro nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* Assumptions: + * + * ARMv8-a, AArch64 + * Unaligned accesses + * wchar_t is 4 bytes + */ + +#include <private/bionic_asm.h> + +/* Parameters and result. */ +#define dstin x0 +#define src x1 +#define count x2 +#define tmp1 x3 +#define tmp1w w3 +#define tmp2 x4 +#define tmp2w w4 +#define tmp3 x5 +#define tmp3w w5 +#define dst x6 + +#define A_l x7 +#define A_h x8 +#define B_l x9 +#define B_h x10 +#define C_l x11 +#define C_h x12 +#define D_l x13 +#define D_h x14 + +#if defined(WMEMMOVE) +ENTRY(wmemmove) + lsl count, count, #2 +#else +ENTRY(memmove) +#endif + cmp dstin, src + b.lo .Ldownwards + add tmp1, src, count + cmp dstin, tmp1 + b.hs memcpy /* No overlap. */ + + /* Upwards move with potential overlap. + * Need to move from the tail backwards. SRC and DST point one + * byte beyond the remaining data to move. */ + add dst, dstin, count + add src, src, count + cmp count, #64 + b.ge .Lmov_not_short_up + + /* Deal with small moves quickly by dropping straight into the + * exit block. */ +.Ltail63up: + /* Move up to 48 bytes of data. At this point we only need the + * bottom 6 bits of count to be accurate. */ + ands tmp1, count, #0x30 + b.eq .Ltail15up + sub dst, dst, tmp1 + sub src, src, tmp1 + cmp tmp1w, #0x20 + b.eq 1f + b.lt 2f + ldp A_l, A_h, [src, #32] + stp A_l, A_h, [dst, #32] +1: + ldp A_l, A_h, [src, #16] + stp A_l, A_h, [dst, #16] +2: + ldp A_l, A_h, [src] + stp A_l, A_h, [dst] +.Ltail15up: + /* Move up to 15 bytes of data. Does not assume additional data + * being moved. */ + tbz count, #3, 1f + ldr tmp1, [src, #-8]! + str tmp1, [dst, #-8]! +1: + tbz count, #2, 1f + ldr tmp1w, [src, #-4]! + str tmp1w, [dst, #-4]! +1: + tbz count, #1, 1f + ldrh tmp1w, [src, #-2]! + strh tmp1w, [dst, #-2]! +1: + tbz count, #0, 1f + ldrb tmp1w, [src, #-1] + strb tmp1w, [dst, #-1] +1: + ret + +.Lmov_not_short_up: + /* We don't much care about the alignment of DST, but we want SRC + * to be 128-bit (16 byte) aligned so that we don't cross cache line + * boundaries on both loads and stores. */ + ands tmp2, src, #15 /* Bytes to reach alignment. */ + b.eq 2f + sub count, count, tmp2 + /* Move enough data to reach alignment; unlike memcpy, we have to + * be aware of the overlap, which means we can't move data twice. */ + tbz tmp2, #3, 1f + ldr tmp1, [src, #-8]! + str tmp1, [dst, #-8]! +1: + tbz tmp2, #2, 1f + ldr tmp1w, [src, #-4]! + str tmp1w, [dst, #-4]! +1: + tbz tmp2, #1, 1f + ldrh tmp1w, [src, #-2]! + strh tmp1w, [dst, #-2]! +1: + tbz tmp2, #0, 1f + ldrb tmp1w, [src, #-1]! + strb tmp1w, [dst, #-1]! +1: + + /* There may be less than 63 bytes to go now. */ + cmp count, #63 + b.le .Ltail63up +2: + subs count, count, #128 + b.ge .Lmov_body_large_up + /* Less than 128 bytes to move, so handle 64 here and then jump + * to the tail. */ + ldp A_l, A_h, [src, #-64]! + ldp B_l, B_h, [src, #16] + ldp C_l, C_h, [src, #32] + ldp D_l, D_h, [src, #48] + stp A_l, A_h, [dst, #-64]! + stp B_l, B_h, [dst, #16] + stp C_l, C_h, [dst, #32] + stp D_l, D_h, [dst, #48] + tst count, #0x3f + b.ne .Ltail63up + ret + + /* Critical loop. Start at a new Icache line boundary. Assuming + * 64 bytes per line this ensures the entire loop is in one line. */ + .p2align 6 +.Lmov_body_large_up: + /* There are at least 128 bytes to move. */ + ldp A_l, A_h, [src, #-16] + ldp B_l, B_h, [src, #-32] + ldp C_l, C_h, [src, #-48] + ldp D_l, D_h, [src, #-64]! +1: + stp A_l, A_h, [dst, #-16] + ldp A_l, A_h, [src, #-16] + stp B_l, B_h, [dst, #-32] + ldp B_l, B_h, [src, #-32] + stp C_l, C_h, [dst, #-48] + ldp C_l, C_h, [src, #-48] + stp D_l, D_h, [dst, #-64]! + ldp D_l, D_h, [src, #-64]! + subs count, count, #64 + b.ge 1b + stp A_l, A_h, [dst, #-16] + stp B_l, B_h, [dst, #-32] + stp C_l, C_h, [dst, #-48] + stp D_l, D_h, [dst, #-64]! + tst count, #0x3f + b.ne .Ltail63up + ret + + +.Ldownwards: + /* For a downwards move we can safely use memcpy provided that + * DST is more than 16 bytes away from SRC. */ + sub tmp1, src, #16 + cmp dstin, tmp1 + b.ls memcpy /* May overlap, but not critically. */ + + mov dst, dstin /* Preserve DSTIN for return value. */ + cmp count, #64 + b.ge .Lmov_not_short_down + + /* Deal with small moves quickly by dropping straight into the + * exit block. */ +.Ltail63down: + /* Move up to 48 bytes of data. At this point we only need the + * bottom 6 bits of count to be accurate. */ + ands tmp1, count, #0x30 + b.eq .Ltail15down + add dst, dst, tmp1 + add src, src, tmp1 + cmp tmp1w, #0x20 + b.eq 1f + b.lt 2f + ldp A_l, A_h, [src, #-48] + stp A_l, A_h, [dst, #-48] +1: + ldp A_l, A_h, [src, #-32] + stp A_l, A_h, [dst, #-32] +2: + ldp A_l, A_h, [src, #-16] + stp A_l, A_h, [dst, #-16] +.Ltail15down: + /* Move up to 15 bytes of data. Does not assume additional data + being moved. */ + tbz count, #3, 1f + ldr tmp1, [src], #8 + str tmp1, [dst], #8 +1: + tbz count, #2, 1f + ldr tmp1w, [src], #4 + str tmp1w, [dst], #4 +1: + tbz count, #1, 1f + ldrh tmp1w, [src], #2 + strh tmp1w, [dst], #2 +1: + tbz count, #0, 1f + ldrb tmp1w, [src] + strb tmp1w, [dst] +1: + ret + +.Lmov_not_short_down: + /* We don't much care about the alignment of DST, but we want SRC + * to be 128-bit (16 byte) aligned so that we don't cross cache line + * boundaries on both loads and stores. */ + neg tmp2, src + ands tmp2, tmp2, #15 /* Bytes to reach alignment. */ + b.eq 2f + sub count, count, tmp2 + /* Move enough data to reach alignment; unlike memcpy, we have to + * be aware of the overlap, which means we can't move data twice. */ + tbz tmp2, #3, 1f + ldr tmp1, [src], #8 + str tmp1, [dst], #8 +1: + tbz tmp2, #2, 1f + ldr tmp1w, [src], #4 + str tmp1w, [dst], #4 +1: + tbz tmp2, #1, 1f + ldrh tmp1w, [src], #2 + strh tmp1w, [dst], #2 +1: + tbz tmp2, #0, 1f + ldrb tmp1w, [src], #1 + strb tmp1w, [dst], #1 +1: + + /* There may be less than 63 bytes to go now. */ + cmp count, #63 + b.le .Ltail63down +2: + subs count, count, #128 + b.ge .Lmov_body_large_down + /* Less than 128 bytes to move, so handle 64 here and then jump + * to the tail. */ + ldp A_l, A_h, [src] + ldp B_l, B_h, [src, #16] + ldp C_l, C_h, [src, #32] + ldp D_l, D_h, [src, #48] + stp A_l, A_h, [dst] + stp B_l, B_h, [dst, #16] + stp C_l, C_h, [dst, #32] + stp D_l, D_h, [dst, #48] + tst count, #0x3f + add src, src, #64 + add dst, dst, #64 + b.ne .Ltail63down + ret + + /* Critical loop. Start at a new cache line boundary. Assuming + * 64 bytes per line this ensures the entire loop is in one line. */ + .p2align 6 +.Lmov_body_large_down: + /* There are at least 128 bytes to move. */ + ldp A_l, A_h, [src, #0] + sub dst, dst, #16 /* Pre-bias. */ + ldp B_l, B_h, [src, #16] + ldp C_l, C_h, [src, #32] + ldp D_l, D_h, [src, #48]! /* src += 64 - Pre-bias. */ +1: + stp A_l, A_h, [dst, #16] + ldp A_l, A_h, [src, #16] + stp B_l, B_h, [dst, #32] + ldp B_l, B_h, [src, #32] + stp C_l, C_h, [dst, #48] + ldp C_l, C_h, [src, #48] + stp D_l, D_h, [dst, #64]! + ldp D_l, D_h, [src, #64]! + subs count, count, #64 + b.ge 1b + stp A_l, A_h, [dst, #16] + stp B_l, B_h, [dst, #32] + stp C_l, C_h, [dst, #48] + stp D_l, D_h, [dst, #64] + add src, src, #16 + add dst, dst, #64 + 16 + tst count, #0x3f + b.ne .Ltail63down + ret +#if defined(WMEMMOVE) +END(wmemmove) +#else +END(memmove) +#endif diff --git a/libc/arch-arm64/denver64/denver64.mk b/libc/arch-arm64/denver64/denver64.mk index d619c11..6b1e1ff 100644 --- a/libc/arch-arm64/denver64/denver64.mk +++ b/libc/arch-arm64/denver64/denver64.mk @@ -2,7 +2,7 @@ libc_bionic_src_files_arm64 += \ arch-arm64/generic/bionic/memchr.S \ arch-arm64/generic/bionic/memcmp.S \ arch-arm64/denver64/bionic/memcpy.S \ - arch-arm64/generic/bionic/memmove.S \ + arch-arm64/denver64/bionic/memmove.S \ arch-arm64/denver64/bionic/memset.S \ arch-arm64/generic/bionic/stpcpy.S \ arch-arm64/generic/bionic/strchr.S \ @@ -11,4 +11,5 @@ libc_bionic_src_files_arm64 += \ arch-arm64/generic/bionic/strlen.S \ arch-arm64/generic/bionic/strncmp.S \ arch-arm64/generic/bionic/strnlen.S \ + arch-arm64/generic/bionic/strrchr.S \ arch-arm64/generic/bionic/wmemmove.S diff --git a/libc/arch-arm64/generic/bionic/memcpy_base.S b/libc/arch-arm64/generic/bionic/memcpy_base.S index c5d42ce..f850624 100644 --- a/libc/arch-arm64/generic/bionic/memcpy_base.S +++ b/libc/arch-arm64/generic/bionic/memcpy_base.S @@ -1,4 +1,4 @@ -/* Copyright (c) 2012, Linaro Limited +/* Copyright (c) 2012-2013, Linaro Limited All rights reserved. Redistribution and use in source and binary forms, with or without @@ -22,158 +22,196 @@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + +/* + * Copyright (c) 2015 ARM Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the company may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ /* Assumptions: * - * ARMv8-a, AArch64 - * Unaligned accesses + * ARMv8-a, AArch64, unaligned accesses. * */ +#include <private/bionic_asm.h> + #define dstin x0 #define src x1 #define count x2 -#define tmp1 x3 -#define tmp1w w3 -#define tmp2 x4 -#define tmp2w w4 -#define tmp3 x5 -#define tmp3w w5 -#define dst x6 - -#define A_l x7 -#define A_h x8 -#define B_l x9 -#define B_h x10 -#define C_l x11 -#define C_h x12 -#define D_l x13 -#define D_h x14 - - mov dst, dstin - cmp count, #64 - b.ge .Lcpy_not_short - cmp count, #15 - b.le .Ltail15tiny - - /* Deal with small copies quickly by dropping straight into the - * exit block. */ -.Ltail63: - /* Copy up to 48 bytes of data. At this point we only need the - * bottom 6 bits of count to be accurate. */ - ands tmp1, count, #0x30 - b.eq .Ltail15 - add dst, dst, tmp1 - add src, src, tmp1 - cmp tmp1w, #0x20 - b.eq 1f - b.lt 2f - ldp A_l, A_h, [src, #-48] - stp A_l, A_h, [dst, #-48] -1: - ldp A_l, A_h, [src, #-32] - stp A_l, A_h, [dst, #-32] -2: - ldp A_l, A_h, [src, #-16] - stp A_l, A_h, [dst, #-16] - -.Ltail15: - ands count, count, #15 - beq 1f - add src, src, count - ldp A_l, A_h, [src, #-16] - add dst, dst, count - stp A_l, A_h, [dst, #-16] +#define dst x3 +#define srcend x4 +#define dstend x5 +#define A_l x6 +#define A_lw w6 +#define A_h x7 +#define A_hw w7 +#define B_l x8 +#define B_lw w8 +#define B_h x9 +#define C_l x10 +#define C_h x11 +#define D_l x12 +#define D_h x13 +#define E_l src +#define E_h count +#define F_l srcend +#define F_h dst +#define tmp1 x9 + +#define L(l) .L ## l + +/* Copies are split into 3 main cases: small copies of up to 16 bytes, + medium copies of 17..96 bytes which are fully unrolled. Large copies + of more than 96 bytes align the destination and use an unrolled loop + processing 64 bytes per iteration. + Small and medium copies read all data before writing, allowing any + kind of overlap, and memmove tailcalls memcpy for these cases as + well as non-overlapping copies. +*/ + + prfm PLDL1KEEP, [src] + add srcend, src, count + add dstend, dstin, count + cmp count, 16 + b.ls L(copy16) + cmp count, 96 + b.hi L(copy_long) + + /* Medium copies: 17..96 bytes. */ + sub tmp1, count, 1 + ldp A_l, A_h, [src] + tbnz tmp1, 6, L(copy96) + ldp D_l, D_h, [srcend, -16] + tbz tmp1, 5, 1f + ldp B_l, B_h, [src, 16] + ldp C_l, C_h, [srcend, -32] + stp B_l, B_h, [dstin, 16] + stp C_l, C_h, [dstend, -32] 1: + stp A_l, A_h, [dstin] + stp D_l, D_h, [dstend, -16] ret -.Ltail15tiny: - /* Copy up to 15 bytes of data. Does not assume additional data - being copied. */ - tbz count, #3, 1f - ldr tmp1, [src], #8 - str tmp1, [dst], #8 -1: - tbz count, #2, 1f - ldr tmp1w, [src], #4 - str tmp1w, [dst], #4 -1: - tbz count, #1, 1f - ldrh tmp1w, [src], #2 - strh tmp1w, [dst], #2 -1: - tbz count, #0, 1f - ldrb tmp1w, [src] - strb tmp1w, [dst] + .p2align 4 + + /* Small copies: 0..16 bytes. */ +L(copy16): + cmp count, 8 + b.lo 1f + ldr A_l, [src] + ldr A_h, [srcend, -8] + str A_l, [dstin] + str A_h, [dstend, -8] + ret + .p2align 4 1: + tbz count, 2, 1f + ldr A_lw, [src] + ldr A_hw, [srcend, -4] + str A_lw, [dstin] + str A_hw, [dstend, -4] ret -.Lcpy_not_short: - /* We don't much care about the alignment of DST, but we want SRC - * to be 128-bit (16 byte) aligned so that we don't cross cache line - * boundaries on both loads and stores. */ - neg tmp2, src - ands tmp2, tmp2, #15 /* Bytes to reach alignment. */ - b.eq 2f - sub count, count, tmp2 - /* Copy more data than needed; it's faster than jumping - * around copying sub-Quadword quantities. We know that - * it can't overrun. */ - ldp A_l, A_h, [src] - add src, src, tmp2 - stp A_l, A_h, [dst] - add dst, dst, tmp2 - /* There may be less than 63 bytes to go now. */ - cmp count, #63 - b.le .Ltail63 -2: - subs count, count, #128 - b.ge .Lcpy_body_large - /* Less than 128 bytes to copy, so handle 64 here and then jump - * to the tail. */ - ldp A_l, A_h, [src] - ldp B_l, B_h, [src, #16] - ldp C_l, C_h, [src, #32] - ldp D_l, D_h, [src, #48] - stp A_l, A_h, [dst] - stp B_l, B_h, [dst, #16] - stp C_l, C_h, [dst, #32] - stp D_l, D_h, [dst, #48] - tst count, #0x3f - add src, src, #64 - add dst, dst, #64 - b.ne .Ltail63 + /* Copy 0..3 bytes. Use a branchless sequence that copies the same + byte 3 times if count==1, or the 2nd byte twice if count==2. */ +1: + cbz count, 2f + lsr tmp1, count, 1 + ldrb A_lw, [src] + ldrb A_hw, [srcend, -1] + ldrb B_lw, [src, tmp1] + strb A_lw, [dstin] + strb B_lw, [dstin, tmp1] + strb A_hw, [dstend, -1] +2: ret + + .p2align 4 + /* Copy 64..96 bytes. Copy 64 bytes from the start and + 32 bytes from the end. */ +L(copy96): + ldp B_l, B_h, [src, 16] + ldp C_l, C_h, [src, 32] + ldp D_l, D_h, [src, 48] + ldp E_l, E_h, [srcend, -32] + ldp F_l, F_h, [srcend, -16] + stp A_l, A_h, [dstin] + stp B_l, B_h, [dstin, 16] + stp C_l, C_h, [dstin, 32] + stp D_l, D_h, [dstin, 48] + stp E_l, E_h, [dstend, -32] + stp F_l, F_h, [dstend, -16] ret - /* Critical loop. Start at a new cache line boundary. Assuming - * 64 bytes per line this ensures the entire loop is in one line. */ - .p2align 6 -.Lcpy_body_large: - /* There are at least 128 bytes to copy. */ - ldp A_l, A_h, [src, #0] - sub dst, dst, #16 /* Pre-bias. */ - ldp B_l, B_h, [src, #16] - ldp C_l, C_h, [src, #32] - ldp D_l, D_h, [src, #48]! /* src += 64 - Pre-bias. */ + /* Align DST to 16 byte alignment so that we don't cross cache line + boundaries on both loads and stores. There are at least 96 bytes + to copy, so copy 16 bytes unaligned and then align. The loop + copies 64 bytes per iteration and prefetches one iteration ahead. */ + + .p2align 4 +L(copy_long): + and tmp1, dstin, 15 + bic dst, dstin, 15 + ldp D_l, D_h, [src] + sub src, src, tmp1 + add count, count, tmp1 /* Count is now 16 too large. */ + ldp A_l, A_h, [src, 16] + stp D_l, D_h, [dstin] + ldp B_l, B_h, [src, 32] + ldp C_l, C_h, [src, 48] + ldp D_l, D_h, [src, 64]! + subs count, count, 128 + 16 /* Test and readjust count. */ + b.ls 2f 1: - stp A_l, A_h, [dst, #16] - ldp A_l, A_h, [src, #16] - stp B_l, B_h, [dst, #32] - ldp B_l, B_h, [src, #32] - stp C_l, C_h, [dst, #48] - ldp C_l, C_h, [src, #48] - stp D_l, D_h, [dst, #64]! - ldp D_l, D_h, [src, #64]! - subs count, count, #64 - b.ge 1b - stp A_l, A_h, [dst, #16] - stp B_l, B_h, [dst, #32] - stp C_l, C_h, [dst, #48] - stp D_l, D_h, [dst, #64] - add src, src, #16 - add dst, dst, #64 + 16 - tst count, #0x3f - b.ne .Ltail63 + stp A_l, A_h, [dst, 16] + ldp A_l, A_h, [src, 16] + stp B_l, B_h, [dst, 32] + ldp B_l, B_h, [src, 32] + stp C_l, C_h, [dst, 48] + ldp C_l, C_h, [src, 48] + stp D_l, D_h, [dst, 64]! + ldp D_l, D_h, [src, 64]! + subs count, count, 64 + b.hi 1b + + /* Write the last full set of 64 bytes. The remainder is at most 64 + bytes, so it is safe to always copy 64 bytes from the end even if + there is just 1 byte left. */ +2: + ldp E_l, E_h, [srcend, -64] + stp A_l, A_h, [dst, 16] + ldp A_l, A_h, [srcend, -48] + stp B_l, B_h, [dst, 32] + ldp B_l, B_h, [srcend, -32] + stp C_l, C_h, [dst, 48] + ldp C_l, C_h, [srcend, -16] + stp D_l, D_h, [dst, 64] + stp E_l, E_h, [dstend, -64] + stp A_l, A_h, [dstend, -48] + stp B_l, B_h, [dstend, -32] + stp C_l, C_h, [dstend, -16] ret diff --git a/libc/arch-arm64/generic/bionic/memmove.S b/libc/arch-arm64/generic/bionic/memmove.S index 8b366a3..c50112d 100644 --- a/libc/arch-arm64/generic/bionic/memmove.S +++ b/libc/arch-arm64/generic/bionic/memmove.S @@ -1,4 +1,4 @@ -/* Copyright (c) 2014, Linaro Limited +/* Copyright (c) 2013, Linaro Limited All rights reserved. Redistribution and use in source and binary forms, with or without @@ -22,319 +22,131 @@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + +/* + * Copyright (c) 2015 ARM Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the company may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ /* Assumptions: * - * ARMv8-a, AArch64 - * Unaligned accesses - * wchar_t is 4 bytes + * ARMv8-a, AArch64, unaligned accesses, wchar_t is 4 bytes */ #include <private/bionic_asm.h> /* Parameters and result. */ -#ifdef BCOPY -#define origdstin x1 -#define origsrc x0 -#endif #define dstin x0 #define src x1 #define count x2 -#define tmp1 x3 -#define tmp1w w3 -#define tmp2 x4 -#define tmp2w w4 -#define tmp3 x5 -#define tmp3w w5 -#define dst x6 - -#define A_l x7 -#define A_h x8 -#define B_l x9 -#define B_h x10 -#define C_l x11 -#define C_h x12 -#define D_l x13 -#define D_h x14 +#define srcend x3 +#define dstend x4 +#define tmp1 x5 +#define A_l x6 +#define A_h x7 +#define B_l x8 +#define B_h x9 +#define C_l x10 +#define C_h x11 +#define D_l x12 +#define D_h x13 +#define E_l count +#define E_h tmp1 + +/* All memmoves up to 96 bytes are done by memcpy as it supports overlaps. + Larger backwards copies are also handled by memcpy. The only remaining + case is forward large copies. The destination is aligned, and an + unrolled loop processes 64 bytes per iteration. +*/ -#ifdef BCOPY -ENTRY(bcopy) - /* Swap src and dst so that a branch to memcpy doesn't cause issues. */ - mov tmp1, origsrc - mov origsrc, origdstin - mov origdstin, tmp1 -#elif defined(WMEMMOVE) +#if defined(WMEMMOVE) ENTRY(wmemmove) lsl count, count, #2 #else ENTRY(memmove) #endif - cmp dstin, src - b.lo .Ldownwards - add tmp1, src, count - cmp dstin, tmp1 - b.hs memcpy /* No overlap. */ - - /* Upwards move with potential overlap. - * Need to move from the tail backwards. SRC and DST point one - * byte beyond the remaining data to move. */ - add dst, dstin, count - add src, src, count - cmp count, #64 - b.ge .Lmov_not_short_up - - /* Deal with small moves quickly by dropping straight into the - * exit block. */ -.Ltail63up: - /* Move up to 48 bytes of data. At this point we only need the - * bottom 6 bits of count to be accurate. */ - ands tmp1, count, #0x30 - b.eq .Ltail15up - sub dst, dst, tmp1 - sub src, src, tmp1 - cmp tmp1w, #0x20 - b.eq 1f - b.lt 2f - ldp A_l, A_h, [src, #32] - stp A_l, A_h, [dst, #32] -1: - ldp A_l, A_h, [src, #16] - stp A_l, A_h, [dst, #16] -2: - ldp A_l, A_h, [src] - stp A_l, A_h, [dst] -.Ltail15up: - /* Move up to 15 bytes of data. Does not assume additional data - * being moved. */ - tbz count, #3, 1f - ldr tmp1, [src, #-8]! - str tmp1, [dst, #-8]! -1: - tbz count, #2, 1f - ldr tmp1w, [src, #-4]! - str tmp1w, [dst, #-4]! -1: - tbz count, #1, 1f - ldrh tmp1w, [src, #-2]! - strh tmp1w, [dst, #-2]! -1: - tbz count, #0, 1f - ldrb tmp1w, [src, #-1] - strb tmp1w, [dst, #-1] -1: - ret - -.Lmov_not_short_up: - /* We don't much care about the alignment of DST, but we want SRC - * to be 128-bit (16 byte) aligned so that we don't cross cache line - * boundaries on both loads and stores. */ - ands tmp2, src, #15 /* Bytes to reach alignment. */ - b.eq 2f - sub count, count, tmp2 - /* Move enough data to reach alignment; unlike memcpy, we have to - * be aware of the overlap, which means we can't move data twice. */ - tbz tmp2, #3, 1f - ldr tmp1, [src, #-8]! - str tmp1, [dst, #-8]! -1: - tbz tmp2, #2, 1f - ldr tmp1w, [src, #-4]! - str tmp1w, [dst, #-4]! -1: - tbz tmp2, #1, 1f - ldrh tmp1w, [src, #-2]! - strh tmp1w, [dst, #-2]! -1: - tbz tmp2, #0, 1f - ldrb tmp1w, [src, #-1]! - strb tmp1w, [dst, #-1]! -1: - - /* There may be less than 63 bytes to go now. */ - cmp count, #63 - b.le .Ltail63up + sub tmp1, dstin, src + cmp count, 96 + ccmp tmp1, count, 2, hi + b.hs memcpy + + cbz tmp1, 3f + add dstend, dstin, count + add srcend, src, count + + /* Align dstend to 16 byte alignment so that we don't cross cache line + boundaries on both loads and stores. There are at least 96 bytes + to copy, so copy 16 bytes unaligned and then align. The loop + copies 64 bytes per iteration and prefetches one iteration ahead. */ + + and tmp1, dstend, 15 + ldp D_l, D_h, [srcend, -16] + sub srcend, srcend, tmp1 + sub count, count, tmp1 + ldp A_l, A_h, [srcend, -16] + stp D_l, D_h, [dstend, -16] + ldp B_l, B_h, [srcend, -32] + ldp C_l, C_h, [srcend, -48] + ldp D_l, D_h, [srcend, -64]! + sub dstend, dstend, tmp1 + subs count, count, 128 + b.ls 2f + nop +1: + stp A_l, A_h, [dstend, -16] + ldp A_l, A_h, [srcend, -16] + stp B_l, B_h, [dstend, -32] + ldp B_l, B_h, [srcend, -32] + stp C_l, C_h, [dstend, -48] + ldp C_l, C_h, [srcend, -48] + stp D_l, D_h, [dstend, -64]! + ldp D_l, D_h, [srcend, -64]! + subs count, count, 64 + b.hi 1b + + /* Write the last full set of 64 bytes. The remainder is at most 64 + bytes, so it is safe to always copy 64 bytes from the start even if + there is just 1 byte left. */ 2: - subs count, count, #128 - b.ge .Lmov_body_large_up - /* Less than 128 bytes to move, so handle 64 here and then jump - * to the tail. */ - ldp A_l, A_h, [src, #-64]! - ldp B_l, B_h, [src, #16] - ldp C_l, C_h, [src, #32] - ldp D_l, D_h, [src, #48] - stp A_l, A_h, [dst, #-64]! - stp B_l, B_h, [dst, #16] - stp C_l, C_h, [dst, #32] - stp D_l, D_h, [dst, #48] - tst count, #0x3f - b.ne .Ltail63up - ret - - /* Critical loop. Start at a new Icache line boundary. Assuming - * 64 bytes per line this ensures the entire loop is in one line. */ - .p2align 6 -.Lmov_body_large_up: - /* There are at least 128 bytes to move. */ - ldp A_l, A_h, [src, #-16] - ldp B_l, B_h, [src, #-32] - ldp C_l, C_h, [src, #-48] - ldp D_l, D_h, [src, #-64]! -1: - stp A_l, A_h, [dst, #-16] - ldp A_l, A_h, [src, #-16] - stp B_l, B_h, [dst, #-32] - ldp B_l, B_h, [src, #-32] - stp C_l, C_h, [dst, #-48] - ldp C_l, C_h, [src, #-48] - stp D_l, D_h, [dst, #-64]! - ldp D_l, D_h, [src, #-64]! - subs count, count, #64 - b.ge 1b - stp A_l, A_h, [dst, #-16] - stp B_l, B_h, [dst, #-32] - stp C_l, C_h, [dst, #-48] - stp D_l, D_h, [dst, #-64]! - tst count, #0x3f - b.ne .Ltail63up - ret - - -.Ldownwards: - /* For a downwards move we can safely use memcpy provided that - * DST is more than 16 bytes away from SRC. */ - sub tmp1, src, #16 - cmp dstin, tmp1 - b.ls memcpy /* May overlap, but not critically. */ - - mov dst, dstin /* Preserve DSTIN for return value. */ - cmp count, #64 - b.ge .Lmov_not_short_down - - /* Deal with small moves quickly by dropping straight into the - * exit block. */ -.Ltail63down: - /* Move up to 48 bytes of data. At this point we only need the - * bottom 6 bits of count to be accurate. */ - ands tmp1, count, #0x30 - b.eq .Ltail15down - add dst, dst, tmp1 - add src, src, tmp1 - cmp tmp1w, #0x20 - b.eq 1f - b.lt 2f - ldp A_l, A_h, [src, #-48] - stp A_l, A_h, [dst, #-48] -1: - ldp A_l, A_h, [src, #-32] - stp A_l, A_h, [dst, #-32] -2: - ldp A_l, A_h, [src, #-16] - stp A_l, A_h, [dst, #-16] -.Ltail15down: - /* Move up to 15 bytes of data. Does not assume additional data - being moved. */ - tbz count, #3, 1f - ldr tmp1, [src], #8 - str tmp1, [dst], #8 -1: - tbz count, #2, 1f - ldr tmp1w, [src], #4 - str tmp1w, [dst], #4 -1: - tbz count, #1, 1f - ldrh tmp1w, [src], #2 - strh tmp1w, [dst], #2 -1: - tbz count, #0, 1f - ldrb tmp1w, [src] - strb tmp1w, [dst] -1: - ret - -.Lmov_not_short_down: - /* We don't much care about the alignment of DST, but we want SRC - * to be 128-bit (16 byte) aligned so that we don't cross cache line - * boundaries on both loads and stores. */ - neg tmp2, src - ands tmp2, tmp2, #15 /* Bytes to reach alignment. */ - b.eq 2f - sub count, count, tmp2 - /* Move enough data to reach alignment; unlike memcpy, we have to - * be aware of the overlap, which means we can't move data twice. */ - tbz tmp2, #3, 1f - ldr tmp1, [src], #8 - str tmp1, [dst], #8 -1: - tbz tmp2, #2, 1f - ldr tmp1w, [src], #4 - str tmp1w, [dst], #4 -1: - tbz tmp2, #1, 1f - ldrh tmp1w, [src], #2 - strh tmp1w, [dst], #2 -1: - tbz tmp2, #0, 1f - ldrb tmp1w, [src], #1 - strb tmp1w, [dst], #1 -1: - - /* There may be less than 63 bytes to go now. */ - cmp count, #63 - b.le .Ltail63down -2: - subs count, count, #128 - b.ge .Lmov_body_large_down - /* Less than 128 bytes to move, so handle 64 here and then jump - * to the tail. */ - ldp A_l, A_h, [src] - ldp B_l, B_h, [src, #16] - ldp C_l, C_h, [src, #32] - ldp D_l, D_h, [src, #48] - stp A_l, A_h, [dst] - stp B_l, B_h, [dst, #16] - stp C_l, C_h, [dst, #32] - stp D_l, D_h, [dst, #48] - tst count, #0x3f - add src, src, #64 - add dst, dst, #64 - b.ne .Ltail63down - ret - - /* Critical loop. Start at a new cache line boundary. Assuming - * 64 bytes per line this ensures the entire loop is in one line. */ - .p2align 6 -.Lmov_body_large_down: - /* There are at least 128 bytes to move. */ - ldp A_l, A_h, [src, #0] - sub dst, dst, #16 /* Pre-bias. */ - ldp B_l, B_h, [src, #16] - ldp C_l, C_h, [src, #32] - ldp D_l, D_h, [src, #48]! /* src += 64 - Pre-bias. */ -1: - stp A_l, A_h, [dst, #16] - ldp A_l, A_h, [src, #16] - stp B_l, B_h, [dst, #32] - ldp B_l, B_h, [src, #32] - stp C_l, C_h, [dst, #48] - ldp C_l, C_h, [src, #48] - stp D_l, D_h, [dst, #64]! - ldp D_l, D_h, [src, #64]! - subs count, count, #64 - b.ge 1b - stp A_l, A_h, [dst, #16] - stp B_l, B_h, [dst, #32] - stp C_l, C_h, [dst, #48] - stp D_l, D_h, [dst, #64] - add src, src, #16 - add dst, dst, #64 + 16 - tst count, #0x3f - b.ne .Ltail63down - ret -#ifdef BCOPY -END(bcopy) -#elif defined(WMEMMOVE) + ldp E_l, E_h, [src, 48] + stp A_l, A_h, [dstend, -16] + ldp A_l, A_h, [src, 32] + stp B_l, B_h, [dstend, -32] + ldp B_l, B_h, [src, 16] + stp C_l, C_h, [dstend, -48] + ldp C_l, C_h, [src] + stp D_l, D_h, [dstend, -64] + stp E_l, E_h, [dstin, 48] + stp A_l, A_h, [dstin, 32] + stp B_l, B_h, [dstin, 16] + stp C_l, C_h, [dstin] +3: ret + +#if defined(WMEMMOVE) END(wmemmove) #else END(memmove) diff --git a/libc/arch-arm64/generic/bionic/memset.S b/libc/arch-arm64/generic/bionic/memset.S index 7c204b4..4b3b17b 100644 --- a/libc/arch-arm64/generic/bionic/memset.S +++ b/libc/arch-arm64/generic/bionic/memset.S @@ -1,4 +1,4 @@ -/* Copyright (c) 2012, Linaro Limited +/* Copyright (c) 2012-2013, Linaro Limited All rights reserved. Redistribution and use in source and binary forms, with or without @@ -22,226 +22,207 @@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + +/* + * Copyright (c) 2015 ARM Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the company may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ /* Assumptions: * - * ARMv8-a, AArch64 - * Unaligned accesses + * ARMv8-a, AArch64, unaligned accesses * */ #include <private/bionic_asm.h> -/* By default we assume that the DC instruction can be used to zero - data blocks more efficiently. In some circumstances this might be - unsafe, for example in an asymmetric multiprocessor environment with - different DC clear lengths (neither the upper nor lower lengths are - safe to use). - - If code may be run in a virtualized environment, then define - MAYBE_VIRT. This will cause the code to cache the system register - values rather than re-reading them each call. */ - -#define dstin x0 -#ifdef BZERO -#define count x1 -#else -#define count x2 -#endif -#define val w1 -#define tmp1 x3 -#define tmp1w w3 -#define tmp2 x4 -#define tmp2w w4 -#define zva_len_x x5 -#define zva_len w5 -#define zva_bits_x x6 - -#define A_l x7 -#define A_lw w7 -#define dst x8 -#define tmp3w w9 - -#ifdef BZERO -ENTRY(bzero) -#else +#define dstin x0 +#define val x1 +#define valw w1 +#define count x2 +#define dst x3 +#define dstend x4 +#define tmp1 x5 +#define tmp1w w5 +#define tmp2 x6 +#define tmp2w w6 +#define zva_len x7 +#define zva_lenw w7 + +#define L(l) .L ## l + ENTRY(memset) -#endif - - mov dst, dstin /* Preserve return value. */ -#ifdef BZERO - b .Lzero_mem -#endif - ands A_lw, val, #255 - b.eq .Lzero_mem - orr A_lw, A_lw, A_lw, lsl #8 - orr A_lw, A_lw, A_lw, lsl #16 - orr A_l, A_l, A_l, lsl #32 -.Ltail_maybe_long: - cmp count, #64 - b.ge .Lnot_short -.Ltail_maybe_tiny: - cmp count, #15 - b.le .Ltail15tiny -.Ltail63: - ands tmp1, count, #0x30 - b.eq .Ltail15 - add dst, dst, tmp1 - cmp tmp1w, #0x20 - b.eq 1f - b.lt 2f - stp A_l, A_l, [dst, #-48] -1: - stp A_l, A_l, [dst, #-32] -2: - stp A_l, A_l, [dst, #-16] - -.Ltail15: - and count, count, #15 - add dst, dst, count - stp A_l, A_l, [dst, #-16] /* Repeat some/all of last store. */ - ret -.Ltail15tiny: - /* Set up to 15 bytes. Does not assume earlier memory - being set. */ - tbz count, #3, 1f - str A_l, [dst], #8 -1: - tbz count, #2, 1f - str A_lw, [dst], #4 -1: - tbz count, #1, 1f - strh A_lw, [dst], #2 -1: - tbz count, #0, 1f - strb A_lw, [dst] -1: + dup v0.16B, valw + add dstend, dstin, count + + cmp count, 96 + b.hi L(set_long) + cmp count, 16 + b.hs L(set_medium) + mov val, v0.D[0] + + /* Set 0..15 bytes. */ + tbz count, 3, 1f + str val, [dstin] + str val, [dstend, -8] + ret + nop +1: tbz count, 2, 2f + str valw, [dstin] + str valw, [dstend, -4] + ret +2: cbz count, 3f + strb valw, [dstin] + tbz count, 1, 3f + strh valw, [dstend, -2] +3: ret + + /* Set 17..96 bytes. */ +L(set_medium): + str q0, [dstin] + tbnz count, 6, L(set96) + str q0, [dstend, -16] + tbz count, 5, 1f + str q0, [dstin, 16] + str q0, [dstend, -32] +1: ret + + .p2align 4 + /* Set 64..96 bytes. Write 64 bytes from the start and + 32 bytes from the end. */ +L(set96): + str q0, [dstin, 16] + stp q0, q0, [dstin, 32] + stp q0, q0, [dstend, -32] ret - /* Critical loop. Start at a new cache line boundary. Assuming - * 64 bytes per line, this ensures the entire loop is in one line. */ - .p2align 6 -.Lnot_short: - neg tmp2, dst - ands tmp2, tmp2, #15 - b.eq 2f - /* Bring DST to 128-bit (16-byte) alignment. We know that there's - * more than that to set, so we simply store 16 bytes and advance by - * the amount required to reach alignment. */ - sub count, count, tmp2 - stp A_l, A_l, [dst] - add dst, dst, tmp2 - /* There may be less than 63 bytes to go now. */ - cmp count, #63 - b.le .Ltail63 -2: - sub dst, dst, #16 /* Pre-bias. */ - sub count, count, #64 -1: - stp A_l, A_l, [dst, #16] - stp A_l, A_l, [dst, #32] - stp A_l, A_l, [dst, #48] - stp A_l, A_l, [dst, #64]! - subs count, count, #64 - b.ge 1b - tst count, #0x3f - add dst, dst, #16 - b.ne .Ltail63 + .p2align 3 + nop +L(set_long): + and valw, valw, 255 + bic dst, dstin, 15 + str q0, [dstin] + cmp count, 256 + ccmp valw, 0, 0, cs + b.eq L(try_zva) +L(no_zva): + sub count, dstend, dst /* Count is 16 too large. */ + add dst, dst, 16 + sub count, count, 64 + 16 /* Adjust count and bias for loop. */ +1: stp q0, q0, [dst], 64 + stp q0, q0, [dst, -32] +L(tail64): + subs count, count, 64 + b.hi 1b +2: stp q0, q0, [dstend, -64] + stp q0, q0, [dstend, -32] ret - /* For zeroing memory, check to see if we can use the ZVA feature to - * zero entire 'cache' lines. */ -.Lzero_mem: - mov A_l, #0 - cmp count, #63 - b.le .Ltail_maybe_tiny - neg tmp2, dst - ands tmp2, tmp2, #15 - b.eq 1f - sub count, count, tmp2 - stp A_l, A_l, [dst] - add dst, dst, tmp2 - cmp count, #63 - b.le .Ltail63 -1: - /* For zeroing small amounts of memory, it's not worth setting up - * the line-clear code. */ - cmp count, #128 - b.lt .Lnot_short -#ifdef MAYBE_VIRT - /* For efficiency when virtualized, we cache the ZVA capability. */ - adrp tmp2, .Lcache_clear - ldr zva_len, [tmp2, #:lo12:.Lcache_clear] - tbnz zva_len, #31, .Lnot_short - cbnz zva_len, .Lzero_by_line + .p2align 3 +L(try_zva): mrs tmp1, dczid_el0 - tbz tmp1, #4, 1f - /* ZVA not available. Remember this for next time. */ - mov zva_len, #~0 - str zva_len, [tmp2, #:lo12:.Lcache_clear] - b .Lnot_short -1: - mov tmp3w, #4 - and zva_len, tmp1w, #15 /* Safety: other bits reserved. */ - lsl zva_len, tmp3w, zva_len - str zva_len, [tmp2, #:lo12:.Lcache_clear] -#else - mrs tmp1, dczid_el0 - tbnz tmp1, #4, .Lnot_short - mov tmp3w, #4 - and zva_len, tmp1w, #15 /* Safety: other bits reserved. */ - lsl zva_len, tmp3w, zva_len -#endif - -.Lzero_by_line: - /* Compute how far we need to go to become suitably aligned. We're - * already at quad-word alignment. */ - cmp count, zva_len_x - b.lt .Lnot_short /* Not enough to reach alignment. */ - sub zva_bits_x, zva_len_x, #1 - neg tmp2, dst - ands tmp2, tmp2, zva_bits_x - b.eq 1f /* Already aligned. */ - /* Not aligned, check that there's enough to copy after alignment. */ - sub tmp1, count, tmp2 - cmp tmp1, #64 - ccmp tmp1, zva_len_x, #8, ge /* NZCV=0b1000 */ - b.lt .Lnot_short - /* We know that there's at least 64 bytes to zero and that it's safe - * to overrun by 64 bytes. */ - mov count, tmp1 -2: - stp A_l, A_l, [dst] - stp A_l, A_l, [dst, #16] - stp A_l, A_l, [dst, #32] - subs tmp2, tmp2, #64 - stp A_l, A_l, [dst, #48] - add dst, dst, #64 - b.ge 2b - /* We've overrun a bit, so adjust dst downwards. */ - add dst, dst, tmp2 -1: - sub count, count, zva_len_x -3: - dc zva, dst - add dst, dst, zva_len_x - subs count, count, zva_len_x - b.ge 3b - ands count, count, zva_bits_x - b.ne .Ltail_maybe_long + tbnz tmp1w, 4, L(no_zva) + and tmp1w, tmp1w, 15 + cmp tmp1w, 4 /* ZVA size is 64 bytes. */ + b.ne L(zva_128) + + /* Write the first and last 64 byte aligned block using stp rather + than using DC ZVA. This is faster on some cores. + */ +L(zva_64): + str q0, [dst, 16] + stp q0, q0, [dst, 32] + bic dst, dst, 63 + stp q0, q0, [dst, 64] + stp q0, q0, [dst, 96] + sub count, dstend, dst /* Count is now 128 too large. */ + sub count, count, 128+64+64 /* Adjust count and bias for loop. */ + add dst, dst, 128 + nop +1: dc zva, dst + add dst, dst, 64 + subs count, count, 64 + b.hi 1b + stp q0, q0, [dst, 0] + stp q0, q0, [dst, 32] + stp q0, q0, [dstend, -64] + stp q0, q0, [dstend, -32] ret -#ifdef BZERO -END(bzero) -#else + + .p2align 3 +L(zva_128): + cmp tmp1w, 5 /* ZVA size is 128 bytes. */ + b.ne L(zva_other) + + str q0, [dst, 16] + stp q0, q0, [dst, 32] + stp q0, q0, [dst, 64] + stp q0, q0, [dst, 96] + bic dst, dst, 127 + sub count, dstend, dst /* Count is now 128 too large. */ + sub count, count, 128+128 /* Adjust count and bias for loop. */ + add dst, dst, 128 +1: dc zva, dst + add dst, dst, 128 + subs count, count, 128 + b.hi 1b + stp q0, q0, [dstend, -128] + stp q0, q0, [dstend, -96] + stp q0, q0, [dstend, -64] + stp q0, q0, [dstend, -32] + ret + +L(zva_other): + mov tmp2w, 4 + lsl zva_lenw, tmp2w, tmp1w + add tmp1, zva_len, 64 /* Max alignment bytes written. */ + cmp count, tmp1 + blo L(no_zva) + + sub tmp2, zva_len, 1 + add tmp1, dst, zva_len + add dst, dst, 16 + subs count, tmp1, dst /* Actual alignment bytes to write. */ + bic tmp1, tmp1, tmp2 /* Aligned dc zva start address. */ + beq 2f +1: stp q0, q0, [dst], 64 + stp q0, q0, [dst, -32] + subs count, count, 64 + b.hi 1b +2: mov dst, tmp1 + sub count, dstend, tmp1 /* Remaining bytes to write. */ + subs count, count, zva_len + b.lo 4f +3: dc zva, dst + add dst, dst, zva_len + subs count, count, zva_len + b.hs 3b +4: add count, count, zva_len + b L(tail64) + END(memset) -#endif - -#ifdef MAYBE_VIRT - .bss - .p2align 2 -.Lcache_clear: - .space 4 -#endif diff --git a/libc/arch-arm64/generic/bionic/strlen.S b/libc/arch-arm64/generic/bionic/strlen.S index 3bd9809..6e540fc 100644 --- a/libc/arch-arm64/generic/bionic/strlen.S +++ b/libc/arch-arm64/generic/bionic/strlen.S @@ -1,16 +1,16 @@ -/* Copyright (c) 2014, Linaro Limited +/* Copyright (c) 2013-2015, Linaro Limited All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. + notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. * Neither the name of the Linaro nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT @@ -22,16 +22,19 @@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* Assumptions: * - * ARMv8-a, AArch64 + * ARMv8-a, AArch64, unaligned accesses, min page size 4k. */ #include <private/bionic_asm.h> +/* To test the page crossing code path more thoroughly, compile with + -DTEST_PAGE_CROSS - this will force all calls through the slower + entry path. This option is not intended for production use. */ + /* Arguments and results. */ #define srcin x0 #define len x0 @@ -40,87 +43,185 @@ #define src x1 #define data1 x2 #define data2 x3 -#define data2a x4 -#define has_nul1 x5 -#define has_nul2 x6 -#define tmp1 x7 -#define tmp2 x8 -#define tmp3 x9 -#define tmp4 x10 -#define zeroones x11 -#define pos x12 +#define has_nul1 x4 +#define has_nul2 x5 +#define tmp1 x4 +#define tmp2 x5 +#define tmp3 x6 +#define tmp4 x7 +#define zeroones x8 + +#define L(l) .L ## l + + /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 + (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and + can be done in parallel across the entire word. A faster check + (X - 1) & 0x80 is zero for non-NUL ASCII characters, but gives + false hits for characters 129..255. */ #define REP8_01 0x0101010101010101 #define REP8_7f 0x7f7f7f7f7f7f7f7f #define REP8_80 0x8080808080808080 - /* Start of critial section -- keep to one 64Byte cache line. */ +#ifdef TEST_PAGE_CROSS +# define MIN_PAGE_SIZE 15 +#else +# define MIN_PAGE_SIZE 4096 +#endif + + /* Since strings are short on average, we check the first 16 bytes + of the string for a NUL character. In order to do an unaligned ldp + safely we have to do a page cross check first. If there is a NUL + byte we calculate the length from the 2 8-byte words using + conditional select to reduce branch mispredictions (it is unlikely + strlen will be repeatedly called on strings with the same length). + + If the string is longer than 16 bytes, we align src so don't need + further page cross checks, and process 32 bytes per iteration + using the fast NUL check. If we encounter non-ASCII characters, + fallback to a second loop using the full NUL check. + + If the page cross check fails, we read 16 bytes from an aligned + address, remove any characters before the string, and continue + in the main loop using aligned loads. Since strings crossing a + page in the first 16 bytes are rare (probability of + 16/MIN_PAGE_SIZE ~= 0.4%), this case does not need to be optimized. + + AArch64 systems have a minimum page size of 4k. We don't bother + checking for larger page sizes - the cost of setting up the correct + page size is just not worth the extra gain from a small reduction in + the cases taking the slow path. Note that we only care about + whether the first fetch, which may be misaligned, crosses a page + boundary. */ + ENTRY(strlen) - mov zeroones, #REP8_01 - bic src, srcin, #15 - ands tmp1, srcin, #15 - b.ne .Lmisaligned - /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 - (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and - can be done in parallel across the entire word. */ - /* The inner loop deals with two Dwords at a time. This has a - slightly higher start-up cost, but we should win quite quickly, - especially on cores with a high number of issue slots per - cycle, as we get much better parallelism out of the operations. */ -.Lloop: - ldp data1, data2, [src], #16 -.Lrealigned: + and tmp1, srcin, MIN_PAGE_SIZE - 1 + mov zeroones, REP8_01 + cmp tmp1, MIN_PAGE_SIZE - 16 + b.gt L(page_cross) + ldp data1, data2, [srcin] +#ifdef __AARCH64EB__ + /* For big-endian, carry propagation (if the final byte in the + string is 0x01) means we cannot use has_nul1/2 directly. + Since we expect strings to be small and early-exit, + byte-swap the data now so has_null1/2 will be correct. */ + rev data1, data1 + rev data2, data2 +#endif sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f + orr tmp2, data1, REP8_7f sub tmp3, data2, zeroones - orr tmp4, data2, #REP8_7f - bic has_nul1, tmp1, tmp2 - bics has_nul2, tmp3, tmp4 - ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */ - b.eq .Lloop - /* End of critical section -- keep to one 64Byte cache line. */ + orr tmp4, data2, REP8_7f + bics has_nul1, tmp1, tmp2 + bic has_nul2, tmp3, tmp4 + ccmp has_nul2, 0, 0, eq + beq L(main_loop_entry) - sub len, src, srcin - cbz has_nul1, .Lnul_in_data2 -#ifdef __AARCH64EB__ - mov data2, data1 -#endif - sub len, len, #8 - mov has_nul2, has_nul1 -.Lnul_in_data2: + /* Enter with C = has_nul1 == 0. */ + csel has_nul1, has_nul1, has_nul2, cc + mov len, 8 + rev has_nul1, has_nul1 + clz tmp1, has_nul1 + csel len, xzr, len, cc + add len, len, tmp1, lsr 3 + ret + + /* The inner loop processes 32 bytes per iteration and uses the fast + NUL check. If we encounter non-ASCII characters, use a second + loop with the accurate NUL check. */ + .p2align 4 +L(main_loop_entry): + bic src, srcin, 15 + sub src, src, 16 +L(main_loop): + ldp data1, data2, [src, 32]! +.Lpage_cross_entry: + sub tmp1, data1, zeroones + sub tmp3, data2, zeroones + orr tmp2, tmp1, tmp3 + tst tmp2, zeroones, lsl 7 + bne 1f + ldp data1, data2, [src, 16] + sub tmp1, data1, zeroones + sub tmp3, data2, zeroones + orr tmp2, tmp1, tmp3 + tst tmp2, zeroones, lsl 7 + beq L(main_loop) + add src, src, 16 +1: + /* The fast check failed, so do the slower, accurate NUL check. */ + orr tmp2, data1, REP8_7f + orr tmp4, data2, REP8_7f + bics has_nul1, tmp1, tmp2 + bic has_nul2, tmp3, tmp4 + ccmp has_nul2, 0, 0, eq + beq L(nonascii_loop) + + /* Enter with C = has_nul1 == 0. */ +L(tail): #ifdef __AARCH64EB__ /* For big-endian, carry propagation (if the final byte in the - string is 0x01) means we cannot use has_nul directly. The + string is 0x01) means we cannot use has_nul1/2 directly. The easiest way to get the correct byte is to byte-swap the data and calculate the syndrome a second time. */ - rev data2, data2 - sub tmp1, data2, zeroones - orr tmp2, data2, #REP8_7f - bic has_nul2, tmp1, tmp2 + csel data1, data1, data2, cc + rev data1, data1 + sub tmp1, data1, zeroones + orr tmp2, data1, REP8_7f + bic has_nul1, tmp1, tmp2 +#else + csel has_nul1, has_nul1, has_nul2, cc #endif - sub len, len, #8 - rev has_nul2, has_nul2 - clz pos, has_nul2 - add len, len, pos, lsr #3 /* Bits to bytes. */ + sub len, src, srcin + rev has_nul1, has_nul1 + add tmp2, len, 8 + clz tmp1, has_nul1 + csel len, len, tmp2, cc + add len, len, tmp1, lsr 3 ret -.Lmisaligned: - cmp tmp1, #8 - neg tmp1, tmp1 - ldp data1, data2, [src], #16 - lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ - mov tmp2, #~0 +L(nonascii_loop): + ldp data1, data2, [src, 16]! + sub tmp1, data1, zeroones + orr tmp2, data1, REP8_7f + sub tmp3, data2, zeroones + orr tmp4, data2, REP8_7f + bics has_nul1, tmp1, tmp2 + bic has_nul2, tmp3, tmp4 + ccmp has_nul2, 0, 0, eq + bne L(tail) + ldp data1, data2, [src, 16]! + sub tmp1, data1, zeroones + orr tmp2, data1, REP8_7f + sub tmp3, data2, zeroones + orr tmp4, data2, REP8_7f + bics has_nul1, tmp1, tmp2 + bic has_nul2, tmp3, tmp4 + ccmp has_nul2, 0, 0, eq + beq L(nonascii_loop) + b L(tail) + + /* Load 16 bytes from [srcin & ~15] and force the bytes that precede + srcin to 0x7f, so we ignore any NUL bytes before the string. + Then continue in the aligned loop. */ +L(page_cross): + bic src, srcin, 15 + ldp data1, data2, [src] + lsl tmp1, srcin, 3 + mov tmp4, -1 #ifdef __AARCH64EB__ - /* Big-endian. Early bytes are at MSB. */ - lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ + /* Big-endian. Early bytes are at MSB. */ + lsr tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */ #else /* Little-endian. Early bytes are at LSB. */ - lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ + lsl tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */ #endif - orr data1, data1, tmp2 - orr data2a, data2, tmp2 - csinv data1, data1, xzr, le - csel data2, data2, data2a, le - b .Lrealigned + orr tmp1, tmp1, REP8_80 + orn data1, data1, tmp1 + orn tmp2, data2, tmp1 + tst srcin, 8 + csel data1, data1, tmp4, eq + csel data2, data2, tmp2, eq + b L(page_cross_entry) END(strlen) diff --git a/libc/arch-arm64/generic/bionic/strrchr.S b/libc/arch-arm64/generic/bionic/strrchr.S new file mode 100644 index 0000000..46b5031 --- /dev/null +++ b/libc/arch-arm64/generic/bionic/strrchr.S @@ -0,0 +1,171 @@ +/* + strrchr - find last instance of a character in a string + + Copyright (c) 2014, ARM Limited + All rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the company nor the names of its contributors + may be used to endorse or promote products derived from this + software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + +/* Assumptions: + * + * ARMv8-a, AArch64 + * Neon Available. + */ + +#include <private/bionic_asm.h> + +/* Arguments and results. */ +#define srcin x0 +#define chrin w1 + +#define result x0 + +#define src x2 +#define tmp1 x3 +#define wtmp2 w4 +#define tmp3 x5 +#define src_match x6 +#define src_offset x7 +#define const_m1 x8 +#define tmp4 x9 +#define nul_match x10 +#define chr_match x11 + +#define vrepchr v0 +#define vdata1 v1 +#define vdata2 v2 +#define vhas_nul1 v3 +#define vhas_nul2 v4 +#define vhas_chr1 v5 +#define vhas_chr2 v6 +#define vrepmask_0 v7 +#define vrepmask_c v16 +#define vend1 v17 +#define vend2 v18 + +/* Core algorithm. + + For each 32-byte hunk we calculate a 64-bit syndrome value, with + two bits per byte (LSB is always in bits 0 and 1, for both big + and little-endian systems). For each tuple, bit 0 is set iff + the relevant byte matched the requested character; bit 1 is set + iff the relevant byte matched the NUL end of string (we trigger + off bit0 for the special case of looking for NUL). Since the bits + in the syndrome reflect exactly the order in which things occur + in the original string a count_trailing_zeros() operation will + identify exactly which byte is causing the termination, and why. */ + +/* Locals and temporaries. */ + +ENTRY(strrchr) + /* Magic constant 0x40100401 to allow us to identify which lane + matches the requested byte. Magic constant 0x80200802 used + similarly for NUL termination. */ + mov wtmp2, #0x0401 + movk wtmp2, #0x4010, lsl #16 + dup vrepchr.16b, chrin + bic src, srcin, #31 /* Work with aligned 32-byte hunks. */ + dup vrepmask_c.4s, wtmp2 + mov src_offset, #0 + ands tmp1, srcin, #31 + add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */ + b.eq .Laligned + + /* Input string is not 32-byte aligned. Rather than forcing + the padding bytes to a safe value, we calculate the syndrome + for all the bytes, but then mask off those bits of the + syndrome that are related to the padding. */ + ld1 {vdata1.16b, vdata2.16b}, [src], #32 + neg tmp1, tmp1 + cmeq vhas_nul1.16b, vdata1.16b, #0 + cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b + cmeq vhas_nul2.16b, vdata2.16b, #0 + cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b + and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b + and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b + and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b + and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b + addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b // 256->128 + addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 + addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b // 128->64 + addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b // 128->64 + mov nul_match, vhas_nul1.2d[0] + lsl tmp1, tmp1, #1 + mov const_m1, #~0 + mov chr_match, vhas_chr1.2d[0] + lsr tmp3, const_m1, tmp1 + + bic nul_match, nul_match, tmp3 // Mask padding bits. + bic chr_match, chr_match, tmp3 // Mask padding bits. + cbnz nul_match, .Ltail + +.Lloop: + cmp chr_match, #0 + csel src_match, src, src_match, ne + csel src_offset, chr_match, src_offset, ne +.Laligned: + ld1 {vdata1.16b, vdata2.16b}, [src], #32 + cmeq vhas_nul1.16b, vdata1.16b, #0 + cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b + cmeq vhas_nul2.16b, vdata2.16b, #0 + cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b + addp vend1.16b, vhas_nul1.16b, vhas_nul2.16b // 256->128 + and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b + and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b + addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 + addp vend1.16b, vend1.16b, vend1.16b // 128->64 + addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b // 128->64 + mov nul_match, vend1.2d[0] + mov chr_match, vhas_chr1.2d[0] + cbz nul_match, .Lloop + + and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b + and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b + addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b + addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b + mov nul_match, vhas_nul1.2d[0] + +.Ltail: + /* Work out exactly where the string ends. */ + sub tmp4, nul_match, #1 + eor tmp4, tmp4, nul_match + ands chr_match, chr_match, tmp4 + /* And pick the values corresponding to the last match. */ + csel src_match, src, src_match, ne + csel src_offset, chr_match, src_offset, ne + + /* Count down from the top of the syndrome to find the last match. */ + clz tmp3, src_offset + /* Src_match points beyond the word containing the match, so we can + simply subtract half the bit-offset into the syndrome. Because + we are counting down, we need to go back one more character. */ + add tmp3, tmp3, #2 + sub result, src_match, tmp3, lsr #1 + /* But if the syndrome shows no match was found, then return NULL. */ + cmp src_offset, #0 + csel result, result, xzr, ne + + ret + +END(strrchr) diff --git a/libc/arch-arm64/generic/generic.mk b/libc/arch-arm64/generic/generic.mk index 1b595aa..4512dc5 100644 --- a/libc/arch-arm64/generic/generic.mk +++ b/libc/arch-arm64/generic/generic.mk @@ -11,4 +11,5 @@ libc_bionic_src_files_arm64 += \ arch-arm64/generic/bionic/strlen.S \ arch-arm64/generic/bionic/strncmp.S \ arch-arm64/generic/bionic/strnlen.S \ + arch-arm64/generic/bionic/strrchr.S \ arch-arm64/generic/bionic/wmemmove.S diff --git a/libc/arch-arm64/kryo/bionic/memcpy.S b/libc/arch-arm64/kryo/bionic/memcpy.S new file mode 100644 index 0000000..87e1b3b --- /dev/null +++ b/libc/arch-arm64/kryo/bionic/memcpy.S @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +// Prototype: void *memcpy (void *dst, const void *src, size_t count). + +#include <private/bionic_asm.h> +#include <private/libc_events.h> + +ENTRY(__memcpy_chk) + cmp x2, x3 + b.hi __memcpy_chk_fail + + // Fall through to memcpy... + b memcpy +END(__memcpy_chk) + + .align 6 +ENTRY(memcpy) + #include "memcpy_base.S" +END(memcpy) + +ENTRY_PRIVATE(__memcpy_chk_fail) + // Preserve for accurate backtrace. + stp x29, x30, [sp, -16]! + .cfi_def_cfa_offset 16 + .cfi_rel_offset x29, 0 + .cfi_rel_offset x30, 8 + + adrp x0, error_string + add x0, x0, :lo12:error_string + ldr x1, error_code + bl __fortify_chk_fail +error_code: + .word BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW +END(__memcpy_chk_fail) + + .data + .align 2 +error_string: + .string "memcpy: prevented write past end of buffer" diff --git a/libc/arch-arm64/kryo/bionic/memcpy_base.S b/libc/arch-arm64/kryo/bionic/memcpy_base.S new file mode 100644 index 0000000..0096bb7 --- /dev/null +++ b/libc/arch-arm64/kryo/bionic/memcpy_base.S @@ -0,0 +1,244 @@ +/* Copyright (c) 2015 The Linux Foundation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of The Linux Foundation nor the names of its contributors may + * be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef PLDOFFS +#undef PLDOFFS +#endif +#define PLDOFFS (16) + +#ifdef PLDTHRESH +#undef PLDTHRESH +#endif +#define PLDTHRESH (PLDOFFS) + +#ifdef BBTHRESH +#undef BBTHRESH +#endif +#define BBTHRESH (2048/128) + +#if (PLDOFFS < 1) +#error Routine does not support offsets less than 1 +#endif +#if (PLDTHRESH < PLDOFFS) +#error PLD threshold must be greater than or equal to the PLD offset +#endif + +#ifdef PLDSIZE +#undef PLDSIZE +#endif +#define PLDSIZE (128) + +kryo_bb_memcpy: + mov x11, x0 + cmp x2, #4 + blo kryo_bb_lt4 + cmp x2, #16 + blo kryo_bb_lt16 + cmp x2, #32 + blo kryo_bb_16 + cmp x2, #64 + blo kryo_bb_copy_32_a + cmp x2, #128 + blo kryo_bb_copy_64_a + + // we have at least 127 bytes to achieve 128-byte alignment + neg x3, x1 // calculate count to get SOURCE aligned + ands x3, x3, #0x7F + b.eq kryo_bb_source_aligned // already aligned + // alignment fixup, small to large (favorable alignment) + tbz x3, #0, 1f + ldrb w5, [x1], #1 + strb w5, [x0], #1 +1: tbz x3, #1, 2f + ldrh w6, [x1], #2 + strh w6, [x0], #2 +2: tbz x3, #2, 3f + ldr w8, [x1], #4 + str w8, [x0], #4 +3: tbz x3, #3, 4f + ldr x9, [x1], #8 + str x9, [x0], #8 +4: tbz x3, #4, 5f + ldr q7, [x1], #16 + str q7, [x0], #16 +5: tbz x3, #5, 55f + ldp q0, q1, [x1], #32 + stp q0, q1, [x0], #32 +55: tbz x3, #6, 6f + ldp q0, q1, [x1], #32 + ldp q2, q3, [x1], #32 + stp q0, q1, [x0], #32 + stp q2, q3, [x0], #32 +6: subs x2, x2, x3 // fixup count after alignment + b.eq kryo_bb_exit + cmp x2, #128 + blo kryo_bb_copy_64_a +kryo_bb_source_aligned: + lsr x12, x2, #7 + cmp x12, #PLDTHRESH + bls kryo_bb_copy_128_loop_nopld + + cmp x12, #BBTHRESH + bls kryo_bb_prime_pump + + add x14, x0, #0x400 + add x9, x1, #(PLDOFFS*PLDSIZE) + sub x14, x14, x9 + lsl x14, x14, #(21+32) + lsr x14, x14, #(21+32) + add x14, x14, #(PLDOFFS*PLDSIZE) + cmp x12, x14, lsr #7 + bls kryo_bb_prime_pump + + mov x9, #(PLDOFFS) + lsr x13, x14, #7 + subs x9, x13, x9 + bls kryo_bb_prime_pump + + add x10, x1, x14 + bic x10, x10, #0x7F // Round to multiple of PLDSIZE + + sub x12, x12, x14, lsr #7 + cmp x9, x12 + sub x13, x12, x9 + csel x12, x13, x12, LS + csel x9, x12, x9, HI + csel x12, xzr, x12, HI + + prfm PLDL1STRM, [x1, #((PLDOFFS-1)*PLDSIZE)] + prfm PLDL1STRM, [x1, #((PLDOFFS-1)*PLDSIZE+64)] +kryo_bb_copy_128_loop_outer_doublepld: + prfm PLDL1STRM, [x1, #((PLDOFFS)*PLDSIZE)] + prfm PLDL1STRM, [x1, #((PLDOFFS)*PLDSIZE)+64] + subs x9, x9, #1 + ldp q0, q1, [x1], #32 + ldp q2, q3, [x1], #32 + ldp q4, q5, [x1], #32 + ldp q6, q7, [x1], #32 + prfm PLDL1KEEP, [x10] + prfm PLDL1KEEP, [x10, #64] + add x10, x10, #128 + stp q0, q1, [x0], #32 + stp q2, q3, [x0], #32 + stp q4, q5, [x0], #32 + stp q6, q7, [x0], #32 + bne kryo_bb_copy_128_loop_outer_doublepld + cmp x12, #0 + beq kryo_bb_pop_before_nopld + cmp x12, #(448*1024/128) + bls kryo_bb_copy_128_loop_outer + +kryo_bb_copy_128_loop_ddr: + subs x12, x12, #1 + ldr x3, [x10], #128 + ldp q0, q1, [x1], #32 + ldp q2, q3, [x1], #32 + ldp q4, q5, [x1], #32 + ldp q6, q7, [x1], #32 + stp q0, q1, [x0], #32 + stp q2, q3, [x0], #32 + stp q4, q5, [x0], #32 + stp q6, q7, [x0], #32 + bne kryo_bb_copy_128_loop_ddr + b kryo_bb_pop_before_nopld + +kryo_bb_prime_pump: + mov x14, #(PLDOFFS*PLDSIZE) + add x10, x1, #(PLDOFFS*PLDSIZE) + bic x10, x10, #0x7F + sub x12, x12, #PLDOFFS + prfm PLDL1KEEP, [x10, #(-1*PLDSIZE)] + prfm PLDL1KEEP, [x10, #(-1*PLDSIZE+64)] + cmp x12, #(448*1024/128) + bhi kryo_bb_copy_128_loop_ddr + +kryo_bb_copy_128_loop_outer: + subs x12, x12, #1 + prfm PLDL1KEEP, [x10] + prfm PLDL1KEEP, [x10, #64] + ldp q0, q1, [x1], #32 + ldp q2, q3, [x1], #32 + ldp q4, q5, [x1], #32 + ldp q6, q7, [x1], #32 + add x10, x10, #128 + stp q0, q1, [x0], #32 + stp q2, q3, [x0], #32 + stp q4, q5, [x0], #32 + stp q6, q7, [x0], #32 + bne kryo_bb_copy_128_loop_outer + +kryo_bb_pop_before_nopld: + lsr x12, x14, #7 +kryo_bb_copy_128_loop_nopld: + ldp q0, q1, [x1], #32 + ldp q2, q3, [x1], #32 + ldp q4, q5, [x1], #32 + ldp q6, q7, [x1], #32 + subs x12, x12, #1 + stp q0, q1, [x0], #32 + stp q2, q3, [x0], #32 + stp q4, q5, [x0], #32 + stp q6, q7, [x0], #32 + bne kryo_bb_copy_128_loop_nopld + ands x2, x2, #0x7f + beq kryo_bb_exit + +kryo_bb_copy_64_a: + tbz x2, #6, kryo_bb_copy_32_a + ldp q0, q1, [x1], #32 + ldp q2, q3, [x1], #32 + stp q0, q1, [x0], #32 + stp q2, q3, [x0], #32 +kryo_bb_copy_32_a: + tbz x2, #5, kryo_bb_16 + ldp q0, q1, [x1], #32 + stp q0, q1, [x0], #32 +kryo_bb_16: + tbz x2, #4, kryo_bb_lt16 + ldr q7, [x1], #16 + str q7, [x0], #16 + ands x2, x2, #0x0f + beq kryo_bb_exit +kryo_bb_lt16: + tbz x2, #3, kryo_bb_lt8 + ldr x3, [x1], #8 + str x3, [x0], #8 +kryo_bb_lt8: + tbz x2, #2, kryo_bb_lt4 + ldr w3, [x1], #4 + str w3, [x0], #4 +kryo_bb_lt4: + tbz x2, #1, kryo_bb_lt2 + ldrh w3, [x1], #2 + strh w3, [x0], #2 +kryo_bb_lt2: + tbz x2, #0, kryo_bb_exit + ldrb w3, [x1], #1 + strb w3, [x0], #1 +kryo_bb_exit: + mov x0, x11 + ret + diff --git a/libc/arch-arm64/kryo/bionic/memmove.S b/libc/arch-arm64/kryo/bionic/memmove.S new file mode 100644 index 0000000..739ce49 --- /dev/null +++ b/libc/arch-arm64/kryo/bionic/memmove.S @@ -0,0 +1,329 @@ +/* Copyright (c) 2014, Linaro Limited + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Linaro nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* Assumptions: + * + * ARMv8-a, AArch64 + * Unaligned accesses + * wchar_t is 4 bytes + */ + +#include <private/bionic_asm.h> + +/* Parameters and result. */ +#define dstin x0 +#define src x1 +#define count x2 +#define tmp1 x3 +#define tmp1w w3 +#define tmp2 x4 +#define tmp2w w4 +#define tmp3 x5 +#define tmp3w w5 +#define dst x6 + +#define A_l x7 +#define A_h x8 +#define B_l x9 +#define B_h x10 +#define C_l x11 +#define C_h x12 +#define D_l x13 +#define D_h x14 + +#if defined(WMEMMOVE) +ENTRY(wmemmove) + lsl count, count, #2 +#else +ENTRY(memmove) +#endif + cmp dstin, src + b.lo .Ldownwards + add tmp1, src, count + cmp dstin, tmp1 + b.hs memcpy /* No overlap. */ + + /* Upwards move with potential overlap. + * Need to move from the tail backwards. SRC and DST point one + * byte beyond the remaining data to move. */ + add dst, dstin, count + add src, src, count + cmp count, #64 + b.ge .Lmov_not_short_up + + /* Deal with small moves quickly by dropping straight into the + * exit block. */ +.Ltail63up: + /* Move up to 48 bytes of data. At this point we only need the + * bottom 6 bits of count to be accurate. */ + ands tmp1, count, #0x30 + b.eq .Ltail15up + sub dst, dst, tmp1 + sub src, src, tmp1 + cmp tmp1w, #0x20 + b.eq 1f + b.lt 2f + ldp A_l, A_h, [src, #32] + stp A_l, A_h, [dst, #32] +1: + ldp A_l, A_h, [src, #16] + stp A_l, A_h, [dst, #16] +2: + ldp A_l, A_h, [src] + stp A_l, A_h, [dst] +.Ltail15up: + /* Move up to 15 bytes of data. Does not assume additional data + * being moved. */ + tbz count, #3, 1f + ldr tmp1, [src, #-8]! + str tmp1, [dst, #-8]! +1: + tbz count, #2, 1f + ldr tmp1w, [src, #-4]! + str tmp1w, [dst, #-4]! +1: + tbz count, #1, 1f + ldrh tmp1w, [src, #-2]! + strh tmp1w, [dst, #-2]! +1: + tbz count, #0, 1f + ldrb tmp1w, [src, #-1] + strb tmp1w, [dst, #-1] +1: + ret + +.Lmov_not_short_up: + /* We don't much care about the alignment of DST, but we want SRC + * to be 128-bit (16 byte) aligned so that we don't cross cache line + * boundaries on both loads and stores. */ + ands tmp2, src, #15 /* Bytes to reach alignment. */ + b.eq 2f + sub count, count, tmp2 + /* Move enough data to reach alignment; unlike memcpy, we have to + * be aware of the overlap, which means we can't move data twice. */ + tbz tmp2, #3, 1f + ldr tmp1, [src, #-8]! + str tmp1, [dst, #-8]! +1: + tbz tmp2, #2, 1f + ldr tmp1w, [src, #-4]! + str tmp1w, [dst, #-4]! +1: + tbz tmp2, #1, 1f + ldrh tmp1w, [src, #-2]! + strh tmp1w, [dst, #-2]! +1: + tbz tmp2, #0, 1f + ldrb tmp1w, [src, #-1]! + strb tmp1w, [dst, #-1]! +1: + + /* There may be less than 63 bytes to go now. */ + cmp count, #63 + b.le .Ltail63up +2: + subs count, count, #128 + b.ge .Lmov_body_large_up + /* Less than 128 bytes to move, so handle 64 here and then jump + * to the tail. */ + ldp A_l, A_h, [src, #-64]! + ldp B_l, B_h, [src, #16] + ldp C_l, C_h, [src, #32] + ldp D_l, D_h, [src, #48] + stp A_l, A_h, [dst, #-64]! + stp B_l, B_h, [dst, #16] + stp C_l, C_h, [dst, #32] + stp D_l, D_h, [dst, #48] + tst count, #0x3f + b.ne .Ltail63up + ret + + /* Critical loop. Start at a new Icache line boundary. Assuming + * 64 bytes per line this ensures the entire loop is in one line. */ + .p2align 6 +.Lmov_body_large_up: + /* There are at least 128 bytes to move. */ + ldp A_l, A_h, [src, #-16] + ldp B_l, B_h, [src, #-32] + ldp C_l, C_h, [src, #-48] + ldp D_l, D_h, [src, #-64]! +1: + stp A_l, A_h, [dst, #-16] + ldp A_l, A_h, [src, #-16] + stp B_l, B_h, [dst, #-32] + ldp B_l, B_h, [src, #-32] + stp C_l, C_h, [dst, #-48] + ldp C_l, C_h, [src, #-48] + stp D_l, D_h, [dst, #-64]! + ldp D_l, D_h, [src, #-64]! + subs count, count, #64 + b.ge 1b + stp A_l, A_h, [dst, #-16] + stp B_l, B_h, [dst, #-32] + stp C_l, C_h, [dst, #-48] + stp D_l, D_h, [dst, #-64]! + tst count, #0x3f + b.ne .Ltail63up + ret + + +.Ldownwards: + /* For a downwards move we can safely use memcpy provided that + * DST is more than 16 bytes away from SRC. */ + sub tmp1, src, #16 + cmp dstin, tmp1 + b.ls memcpy /* May overlap, but not critically. */ + + mov dst, dstin /* Preserve DSTIN for return value. */ + cmp count, #64 + b.ge .Lmov_not_short_down + + /* Deal with small moves quickly by dropping straight into the + * exit block. */ +.Ltail63down: + /* Move up to 48 bytes of data. At this point we only need the + * bottom 6 bits of count to be accurate. */ + ands tmp1, count, #0x30 + b.eq .Ltail15down + add dst, dst, tmp1 + add src, src, tmp1 + cmp tmp1w, #0x20 + b.eq 1f + b.lt 2f + ldp A_l, A_h, [src, #-48] + stp A_l, A_h, [dst, #-48] +1: + ldp A_l, A_h, [src, #-32] + stp A_l, A_h, [dst, #-32] +2: + ldp A_l, A_h, [src, #-16] + stp A_l, A_h, [dst, #-16] +.Ltail15down: + /* Move up to 15 bytes of data. Does not assume additional data + being moved. */ + tbz count, #3, 1f + ldr tmp1, [src], #8 + str tmp1, [dst], #8 +1: + tbz count, #2, 1f + ldr tmp1w, [src], #4 + str tmp1w, [dst], #4 +1: + tbz count, #1, 1f + ldrh tmp1w, [src], #2 + strh tmp1w, [dst], #2 +1: + tbz count, #0, 1f + ldrb tmp1w, [src] + strb tmp1w, [dst] +1: + ret + +.Lmov_not_short_down: + /* We don't much care about the alignment of DST, but we want SRC + * to be 128-bit (16 byte) aligned so that we don't cross cache line + * boundaries on both loads and stores. */ + neg tmp2, src + ands tmp2, tmp2, #15 /* Bytes to reach alignment. */ + b.eq 2f + sub count, count, tmp2 + /* Move enough data to reach alignment; unlike memcpy, we have to + * be aware of the overlap, which means we can't move data twice. */ + tbz tmp2, #3, 1f + ldr tmp1, [src], #8 + str tmp1, [dst], #8 +1: + tbz tmp2, #2, 1f + ldr tmp1w, [src], #4 + str tmp1w, [dst], #4 +1: + tbz tmp2, #1, 1f + ldrh tmp1w, [src], #2 + strh tmp1w, [dst], #2 +1: + tbz tmp2, #0, 1f + ldrb tmp1w, [src], #1 + strb tmp1w, [dst], #1 +1: + + /* There may be less than 63 bytes to go now. */ + cmp count, #63 + b.le .Ltail63down +2: + subs count, count, #128 + b.ge .Lmov_body_large_down + /* Less than 128 bytes to move, so handle 64 here and then jump + * to the tail. */ + ldp A_l, A_h, [src] + ldp B_l, B_h, [src, #16] + ldp C_l, C_h, [src, #32] + ldp D_l, D_h, [src, #48] + stp A_l, A_h, [dst] + stp B_l, B_h, [dst, #16] + stp C_l, C_h, [dst, #32] + stp D_l, D_h, [dst, #48] + tst count, #0x3f + add src, src, #64 + add dst, dst, #64 + b.ne .Ltail63down + ret + + /* Critical loop. Start at a new cache line boundary. Assuming + * 64 bytes per line this ensures the entire loop is in one line. */ + .p2align 6 +.Lmov_body_large_down: + /* There are at least 128 bytes to move. */ + ldp A_l, A_h, [src, #0] + sub dst, dst, #16 /* Pre-bias. */ + ldp B_l, B_h, [src, #16] + ldp C_l, C_h, [src, #32] + ldp D_l, D_h, [src, #48]! /* src += 64 - Pre-bias. */ +1: + stp A_l, A_h, [dst, #16] + ldp A_l, A_h, [src, #16] + stp B_l, B_h, [dst, #32] + ldp B_l, B_h, [src, #32] + stp C_l, C_h, [dst, #48] + ldp C_l, C_h, [src, #48] + stp D_l, D_h, [dst, #64]! + ldp D_l, D_h, [src, #64]! + subs count, count, #64 + b.ge 1b + stp A_l, A_h, [dst, #16] + stp B_l, B_h, [dst, #32] + stp C_l, C_h, [dst, #48] + stp D_l, D_h, [dst, #64] + add src, src, #16 + add dst, dst, #64 + 16 + tst count, #0x3f + b.ne .Ltail63down + ret +#if defined(WMEMMOVE) +END(wmemmove) +#else +END(memmove) +#endif diff --git a/libc/arch-arm64/kryo/kryo.mk b/libc/arch-arm64/kryo/kryo.mk new file mode 100644 index 0000000..f85638d --- /dev/null +++ b/libc/arch-arm64/kryo/kryo.mk @@ -0,0 +1,15 @@ +libc_bionic_src_files_arm64 += \ + arch-arm64/generic/bionic/memchr.S \ + arch-arm64/generic/bionic/memcmp.S \ + arch-arm64/kryo/bionic/memcpy.S \ + arch-arm64/kryo/bionic/memmove.S \ + arch-arm64/generic/bionic/memset.S \ + arch-arm64/generic/bionic/stpcpy.S \ + arch-arm64/generic/bionic/strchr.S \ + arch-arm64/generic/bionic/strcmp.S \ + arch-arm64/generic/bionic/strcpy.S \ + arch-arm64/generic/bionic/strlen.S \ + arch-arm64/generic/bionic/strncmp.S \ + arch-arm64/generic/bionic/strnlen.S \ + arch-arm64/generic/bionic/strrchr.S \ + arch-arm64/generic/bionic/wmemmove.S diff --git a/libc/arch-x86_64/string/sse2-memmove-slm.S b/libc/arch-x86_64/string/sse2-memmove-slm.S index 0dbffad..6a5afd6 100644 --- a/libc/arch-x86_64/string/sse2-memmove-slm.S +++ b/libc/arch-x86_64/string/sse2-memmove-slm.S @@ -91,9 +91,6 @@ name: \ .section .text.sse2,"ax",@progbits ENTRY (MEMMOVE) ENTRANCE -#ifdef USE_AS_BCOPY - xchg %rsi, %rdi -#endif mov %rdi, %rax /* Check whether we should copy backward or forward. */ diff --git a/libc/bionic/legacy_32_bit_support.cpp b/libc/bionic/legacy_32_bit_support.cpp index a107664..1336b43 100644 --- a/libc/bionic/legacy_32_bit_support.cpp +++ b/libc/bionic/legacy_32_bit_support.cpp @@ -91,3 +91,22 @@ int getrlimit64(int resource, rlimit64* limits64) { int setrlimit64(int resource, const rlimit64* limits64) { return prlimit64(0, resource, limits64, NULL); } + +// There is no prlimit system call, so we need to use prlimit64. +int prlimit(pid_t pid, int resource, const rlimit* n32, rlimit* o32) { + rlimit64 n64; + if (n32 != nullptr) { + n64.rlim_cur = (n32->rlim_cur == RLIM_INFINITY) ? RLIM64_INFINITY : n32->rlim_cur; + n64.rlim_max = (n32->rlim_max == RLIM_INFINITY) ? RLIM64_INFINITY : n32->rlim_max; + } + + rlimit64 o64; + int result = prlimit64(pid, resource, + (n32 != nullptr) ? &n64 : nullptr, + (o32 != nullptr) ? &o64 : nullptr); + if (result != -1 && o32 != nullptr) { + o32->rlim_cur = (o64.rlim_cur == RLIM64_INFINITY) ? RLIM_INFINITY : o64.rlim_cur; + o32->rlim_max = (o64.rlim_max == RLIM64_INFINITY) ? RLIM_INFINITY : o64.rlim_max; + } + return result; +} diff --git a/libc/bionic/libc_init_common.cpp b/libc/bionic/libc_init_common.cpp index bd71628..ecde8d4 100644 --- a/libc/bionic/libc_init_common.cpp +++ b/libc/bionic/libc_init_common.cpp @@ -250,6 +250,7 @@ static bool __is_unsafe_environment_variable(const char* name) { "LD_ORIGIN_PATH", "LD_PRELOAD", "LD_PROFILE", + "LD_SHIM_LIBS", "LD_SHOW_AUXV", "LD_USE_LOAD_BIAS", "LOCALDOMAIN", diff --git a/libc/bionic/malloc_debug_check.cpp b/libc/bionic/malloc_debug_check.cpp index dee03fa..ad0e613 100644 --- a/libc/bionic/malloc_debug_check.cpp +++ b/libc/bionic/malloc_debug_check.cpp @@ -45,6 +45,7 @@ #include <time.h> #include <unistd.h> #include <unwind.h> +#include <signal.h> #include "debug_mapinfo.h" #include "debug_stacktrace.h" @@ -55,6 +56,14 @@ #include "private/libc_logging.h" #include "private/ScopedPthreadMutexLocker.h" +static unsigned int malloc_sig_enabled = 0; +static unsigned int min_allocation_report_limit; +static unsigned int max_allocation_limit; +static const char* process_name; +static size_t total_count = 0; +static bool isDumped = false; +static bool sigHandled = false; + #define MAX_BACKTRACE_DEPTH 16 #define ALLOCATION_TAG 0x1ee7d00d #define BACKLOG_TAG 0xbabecafe @@ -63,6 +72,11 @@ #define FRONT_GUARD_LEN (1<<5) #define REAR_GUARD 0xbb #define REAR_GUARD_LEN (1<<5) +#define FRONT_GUARD_SS 0xab +#define DEBUG_SIGNAL SIGWINCH + +static void malloc_sigaction(int signum, siginfo_t * sg, void * cxt); +static struct sigaction default_sa; static void log_message(const char* format, ...) { va_list args; @@ -135,9 +149,14 @@ static inline void init_front_guard(hdr_t* hdr) { memset(hdr->front_guard, FRONT_GUARD, FRONT_GUARD_LEN); } +static inline void set_snapshot(hdr_t* hdr) { + memset(hdr->front_guard, FRONT_GUARD_SS, FRONT_GUARD_LEN); +} + static inline bool is_front_guard_valid(hdr_t* hdr) { for (size_t i = 0; i < FRONT_GUARD_LEN; i++) { - if (hdr->front_guard[i] != FRONT_GUARD) { + if (!((hdr->front_guard[i] == FRONT_GUARD) || + (hdr->front_guard[i] == FRONT_GUARD_SS))) { return false; } } @@ -171,6 +190,9 @@ static inline bool is_rear_guard_valid(hdr_t* hdr) { } static inline void add_locked(hdr_t* hdr, hdr_t** tail, hdr_t** head) { + if (hdr->tag == ALLOCATION_TAG) { + total_count += hdr->size; + } hdr->prev = NULL; hdr->next = *head; if (*head) @@ -181,6 +203,9 @@ static inline void add_locked(hdr_t* hdr, hdr_t** tail, hdr_t** head) { } static inline int del_locked(hdr_t* hdr, hdr_t** tail, hdr_t** head) { + if (hdr->tag == ALLOCATION_TAG) { + total_count -= hdr->size; + } if (hdr->prev) { hdr->prev->next = hdr->next; } else { @@ -194,6 +219,25 @@ static inline int del_locked(hdr_t* hdr, hdr_t** tail, hdr_t** head) { return 0; } +static void snapshot_report_leaked_nodes() { + log_message("%s: %s\n", __FILE__, __FUNCTION__); + hdr_t * iterator = head; + size_t total_size = 0; + do { + if (iterator->front_guard[0] == FRONT_GUARD && + iterator->size >= min_allocation_report_limit) { + log_message("obj %p, size %d", iterator, iterator->size); + total_size += iterator->size; + log_backtrace(iterator->bt, iterator->bt_depth); + log_message("------------------------------"); // as an end marker + // Marking the node as we do not want to print it again. + set_snapshot(iterator); + } + iterator = iterator->next; + } while (iterator); + log_message("Total Pending allocations after last snapshot: %d", total_size); +} + static inline void add(hdr_t* hdr, size_t size) { ScopedPthreadMutexLocker locker(&lock); hdr->tag = ALLOCATION_TAG; @@ -202,6 +246,11 @@ static inline void add(hdr_t* hdr, size_t size) { init_rear_guard(hdr); ++g_allocated_block_count; add_locked(hdr, &tail, &head); + if ((total_count >= max_allocation_limit) && !isDumped && malloc_sig_enabled) { + isDumped = true; + sigHandled = true; // Need to bypass the snapshot + kill(getpid(), DEBUG_SIGNAL); + } } static inline int del(hdr_t* hdr) { @@ -233,7 +282,8 @@ static bool was_used_after_free(hdr_t* hdr) { static inline int check_guards(hdr_t* hdr, int* safe) { *safe = 1; if (!is_front_guard_valid(hdr)) { - if (hdr->front_guard[0] == FRONT_GUARD) { + if ((hdr->front_guard[0] == FRONT_GUARD) || + ((hdr->front_guard[0] == FRONT_GUARD_SS))) { log_message("+++ ALLOCATION %p SIZE %d HAS A CORRUPTED FRONT GUARD\n", user(hdr), hdr->size); } else { @@ -656,6 +706,42 @@ extern "C" bool malloc_debug_initialize(HashTable* hash_table, const MallocDebug __libc_format_log(ANDROID_LOG_INFO, "libc", "not gathering backtrace information\n"); } + if (__system_property_get("libc.debug.malloc", env)) { + if(atoi(env) == 40) malloc_sig_enabled = 1; + } + + if (malloc_sig_enabled) { + char debug_proc_size[PROP_VALUE_MAX]; + if (__system_property_get("libc.debug.malloc.maxprocsize", debug_proc_size)) + max_allocation_limit = atoi(debug_proc_size); + else + max_allocation_limit = 30 * 1024 * 1024; // In Bytes [Default is 30 MB] + if (__system_property_get("libc.debug.malloc.minalloclim", debug_proc_size)) + min_allocation_report_limit = atoi(debug_proc_size); + else + min_allocation_report_limit = 10 * 1024; // In Bytes [Default is 10 KB] + process_name = getprogname(); + } + +/* Initializes malloc debugging framework. + * See comments on MallocDebugInit in malloc_debug_common.h + */ + if (malloc_sig_enabled) { + struct sigaction sa; //local or static? + sa.sa_handler = NULL; + sa.sa_sigaction = malloc_sigaction; + sigemptyset(&sa.sa_mask); + sigaddset(&sa.sa_mask, DEBUG_SIGNAL); + sa.sa_flags = SA_SIGINFO; + sa.sa_restorer = NULL; + if (sigaction(DEBUG_SIGNAL, &sa, &default_sa) < 0) { + log_message("Failed to register signal handler w/ errno %s", strerror(errno)); + malloc_sig_enabled = 0; + } else { + log_message("Registered signal handler"); + sigHandled = false; + } + } if (g_backtrace_enabled) { backtrace_startup(); } @@ -668,9 +754,66 @@ extern "C" void malloc_debug_finalize(int malloc_debug_level) { if (malloc_debug_level == 10) { ReportMemoryLeaks(); } + if (malloc_sig_enabled) { + log_message("Deregister %d signal handler", DEBUG_SIGNAL); + sigaction(DEBUG_SIGNAL, &default_sa, NULL); + malloc_sig_enabled = 0; + sigHandled = false; + } if (g_backtrace_enabled) { backtrace_shutdown(); } pthread_setspecific(g_debug_calls_disabled, NULL); } + +static void snapshot_nodes_locked() { + log_message("%s: %s\n", __FILE__, __FUNCTION__); + hdr_t * iterator = head; + do { + if (iterator->front_guard[0] == FRONT_GUARD) { + set_snapshot(iterator); + } + iterator = iterator->next; + } while (iterator); +} + +static void malloc_sigaction(int signum, siginfo_t * info, void * context) +{ + log_message("%s: %s\n", __FILE__, __FUNCTION__); + log_message("%s got %d signal from PID: %d (context:%x)\n", + __func__, signum, info->si_pid, context); + + if (signum != DEBUG_SIGNAL) { + log_message("RECEIVED %d instead of %d\n", signum, DEBUG_SIGNAL); + return; + } + + log_message("Process under observation:%s", process_name); + log_message("Maximum process size limit:%d Bytes", max_allocation_limit); + log_message("Won't print allocation below %d Bytes", min_allocation_report_limit); + log_message("Total count: %d\n", total_count); + + if (!head) { + log_message("No allocations?"); + return; + } + // If sigHandled is false, meaning it's being handled first time + if (!sigHandled) { + sigHandled = true; + // Marking the nodes assuming that they should not be leaked nodes. + snapshot_nodes_locked(); + } else { + // We need to print new allocations now + log_message("Start dumping allocations of the process %s", process_name); + log_message("+++ *** +++ *** +++ *** +++ *** +++ *** +++ *** +++ *** +++ ***\n"); + + // Print allocations of the process + if (g_backtrace_enabled) + snapshot_report_leaked_nodes(); + + log_message("*** +++ *** +++ *** +++ *** +++ *** +++ *** +++ *** +++ *** +++\n"); + log_message("Completed dumping allocations of the process %s", process_name); + } + return; +} diff --git a/libc/bionic/malloc_debug_common.cpp b/libc/bionic/malloc_debug_common.cpp index ee796c6..12fc6dd 100644 --- a/libc/bionic/malloc_debug_common.cpp +++ b/libc/bionic/malloc_debug_common.cpp @@ -396,6 +396,9 @@ static void malloc_init_impl() { } so_name = "libc_malloc_debug_qemu.so"; break; + case 40: + so_name = "libc_malloc_debug_leak.so"; + break; default: error_log("%s: Debug level %d is unknown\n", getprogname(), g_malloc_debug_level); return; @@ -456,6 +459,9 @@ static void malloc_init_impl() { case 20: InitMalloc(malloc_impl_handle, &malloc_dispatch_table, "qemu_instrumented"); break; + case 40: + InitMalloc(malloc_impl_handle, &malloc_dispatch_table, "chk"); + break; default: break; } diff --git a/libc/bionic/mmap.cpp b/libc/bionic/mmap.cpp index 8f25a89..53e8b46 100644 --- a/libc/bionic/mmap.cpp +++ b/libc/bionic/mmap.cpp @@ -36,6 +36,11 @@ extern "C" void* __mmap2(void*, size_t, int, int, int, size_t); #define MMAP2_SHIFT 12 // 2**12 == 4096 +#ifdef LEGACY_MMAP +#define TO_64(a) ((a) & 0x00000000ffffffff) +#else +#define TO_64(a) (a) +#endif static bool kernel_has_MADV_MERGEABLE = true; @@ -60,5 +65,5 @@ void* mmap64(void* addr, size_t size, int prot, int flags, int fd, off64_t offse } void* mmap(void* addr, size_t size, int prot, int flags, int fd, off_t offset) { - return mmap64(addr, size, prot, flags, fd, static_cast<off64_t>(offset)); + return mmap64(addr, size, prot, flags, fd, TO_64(static_cast<off64_t>(offset))); } diff --git a/libc/dns/net/getaddrinfo.c b/libc/dns/net/getaddrinfo.c index 829b679..cc8b8b4 100644 --- a/libc/dns/net/getaddrinfo.c +++ b/libc/dns/net/getaddrinfo.c @@ -1791,10 +1791,14 @@ _find_src_addr(const struct sockaddr *addr, struct sockaddr *src_addr, unsigned return -1; } } - if (mark != MARK_UNSET && setsockopt(sock, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)) < 0) + if (mark != MARK_UNSET && setsockopt(sock, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)) < 0) { + close(sock); return 0; - if (uid > 0 && uid != NET_CONTEXT_INVALID_UID && fchown(sock, uid, (gid_t)-1) < 0) + } + if (uid > 0 && uid != NET_CONTEXT_INVALID_UID && fchown(sock, uid, (gid_t)-1) < 0) { + close(sock); return 0; + } do { ret = __connect(sock, addr, len); } while (ret == -1 && errno == EINTR); diff --git a/libc/include/libgen.h b/libc/include/libgen.h index e89328e..4d22d15 100644 --- a/libc/include/libgen.h +++ b/libc/include/libgen.h @@ -32,18 +32,19 @@ #include <sys/cdefs.h> #include <sys/types.h> + __BEGIN_DECLS -#if !defined(__bionic_using_gnu_basename) /* - * <string.h> gets you the GNU basename. - * <libgen.h> the POSIX one. - * Note that our "POSIX" one has the wrong argument cv-qualifiers, but doesn't - * modify its input and uses thread-local storage for the result if necessary. + * Including <string.h> will get you the GNU basename, unless <libgen.h> is + * included, either before or after including <string.h>. + * + * Note that this has the wrong argument cv-qualifiers, but doesn't modify its + * input and uses thread-local storage for the result if necessary. */ -extern char* basename(const char*); -#define __bionic_using_posix_basename -#endif +extern char* __posix_basename(const char*) __RENAME(basename); + +#define basename __posix_basename /* This has the wrong argument cv-qualifiers, but doesn't modify its input and uses thread-local storage for the result if necessary. */ extern char* dirname(const char*); diff --git a/libc/include/paths.h b/libc/include/paths.h index 82c2804..7700cdd 100644 --- a/libc/include/paths.h +++ b/libc/include/paths.h @@ -33,6 +33,7 @@ #define _PATHS_H_ #define _PATH_BSHELL "/system/bin/sh" +#define _PATH_BSHELL2 "/sbin/sh" #define _PATH_CONSOLE "/dev/console" #define _PATH_DEFPATH "/sbin:/vendor/bin:/system/sbin:/system/bin:/system/xbin" #define _PATH_DEV "/dev/" diff --git a/libc/include/regex.h b/libc/include/regex.h index aec38e3..b06a515 100644 --- a/libc/include/regex.h +++ b/libc/include/regex.h @@ -42,8 +42,9 @@ #include <sys/cdefs.h> #include <sys/types.h> -/* types */ -typedef off_t regoff_t; +/* POSIX says regoff_t is at least as large as the larger of ptrdiff_t and + * ssize_t. BSD uses off_t, but that interacts badly with _FILE_OFFSET_BITS. */ +typedef ssize_t regoff_t; typedef struct { int re_magic; diff --git a/libc/include/string.h b/libc/include/string.h index d32c164..8ceccd5 100644 --- a/libc/include/string.h +++ b/libc/include/string.h @@ -107,18 +107,18 @@ extern size_t strxfrm(char* __restrict, const char* __restrict, size_t); extern int strcoll_l(const char *, const char *, locale_t) __purefunc; extern size_t strxfrm_l(char* __restrict, const char* __restrict, size_t, locale_t); -#if defined(__USE_GNU) && !defined(__bionic_using_posix_basename) +#if defined(__USE_GNU) && !defined(basename) /* * glibc has a basename in <string.h> that's different to the POSIX one in <libgen.h>. * It doesn't modify its argument, and in C++ it's const-correct. */ + #if defined(__cplusplus) extern "C++" char* basename(char*) __RENAME(__gnu_basename) __nonnull((1)); extern "C++" const char* basename(const char*) __RENAME(__gnu_basename) __nonnull((1)); #else extern char* basename(const char*) __RENAME(__gnu_basename) __nonnull((1)); #endif -#define __bionic_using_gnu_basename #endif extern void* __memchr_chk(const void*, int, size_t, size_t); diff --git a/libc/include/sys/resource.h b/libc/include/sys/resource.h index 3f8dd45..8209dfb 100644 --- a/libc/include/sys/resource.h +++ b/libc/include/sys/resource.h @@ -53,10 +53,7 @@ extern int setpriority(int, int, int); extern int getrusage(int, struct rusage*); -#if __LP64__ -/* Implementing prlimit for 32-bit isn't worth the effort. */ extern int prlimit(pid_t, int, const struct rlimit*, struct rlimit*); -#endif extern int prlimit64(pid_t, int, const struct rlimit64*, struct rlimit64*); __END_DECLS diff --git a/libc/kernel/uapi/linux/android_alarm.h b/libc/kernel/uapi/linux/android_alarm.h index 801a01e..9f2de28 100644 --- a/libc/kernel/uapi/linux/android_alarm.h +++ b/libc/kernel/uapi/linux/android_alarm.h @@ -28,28 +28,31 @@ enum android_alarm_type { /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ ANDROID_ALARM_ELAPSED_REALTIME, ANDROID_ALARM_SYSTEMTIME, + ANDROID_ALARM_RTC_POWEROFF_WAKEUP, ANDROID_ALARM_TYPE_COUNT, -}; /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ +}; enum android_alarm_return_flags { ANDROID_ALARM_RTC_WAKEUP_MASK = 1U << ANDROID_ALARM_RTC_WAKEUP, ANDROID_ALARM_RTC_MASK = 1U << ANDROID_ALARM_RTC, - ANDROID_ALARM_ELAPSED_REALTIME_WAKEUP_MASK = 1U << ANDROID_ALARM_ELAPSED_REALTIME_WAKEUP, /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ + ANDROID_ALARM_ELAPSED_REALTIME_WAKEUP_MASK = 1U << ANDROID_ALARM_ELAPSED_REALTIME_WAKEUP, ANDROID_ALARM_ELAPSED_REALTIME_MASK = 1U << ANDROID_ALARM_ELAPSED_REALTIME, ANDROID_ALARM_SYSTEMTIME_MASK = 1U << ANDROID_ALARM_SYSTEMTIME, + ANDROID_ALARM_RTC_POWEROFF_WAKEUP_MASK = 1U << ANDROID_ALARM_RTC_POWEROFF_WAKEUP, +/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ ANDROID_ALARM_TIME_CHANGE_MASK = 1U << 16 }; -/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ #define ANDROID_ALARM_CLEAR(type) _IO('a', 0 | ((type) << 4)) #define ANDROID_ALARM_WAIT _IO('a', 1) +/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ #define ALARM_IOW(c,type,size) _IOW('a', (c) | ((type) << 4), size) #define ANDROID_ALARM_SET(type) ALARM_IOW(2, type, struct timespec) -/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ #define ANDROID_ALARM_SET_AND_WAIT(type) ALARM_IOW(3, type, struct timespec) #define ANDROID_ALARM_GET_TIME(type) ALARM_IOW(4, type, struct timespec) +/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ #define ANDROID_ALARM_SET_RTC _IOW('a', 5, struct timespec) #define ANDROID_ALARM_BASE_CMD(cmd) (cmd & ~(_IOC(0, 0, 0xf0, 0))) -/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ #define ANDROID_ALARM_IOCTL_TO_TYPE(cmd) (_IOC_NR(cmd) >> 4) #endif +/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ diff --git a/libc/kernel/uapi/linux/if_packet.h b/libc/kernel/uapi/linux/if_packet.h index 6e9ae6a..ba283c6 100644 --- a/libc/kernel/uapi/linux/if_packet.h +++ b/libc/kernel/uapi/linux/if_packet.h @@ -119,143 +119,145 @@ struct tpacket_auxdata { #define TP_STATUS_VLAN_VALID (1 << 4) #define TP_STATUS_BLK_TMO (1 << 5) #define TP_STATUS_VLAN_TPID_VALID (1 << 6) -#define TP_STATUS_AVAILABLE 0 +#define TP_STATUS_CSUM_UNNECESSARY (1 << 7) /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ +#define TP_STATUS_AVAILABLE 0 #define TP_STATUS_SEND_REQUEST (1 << 0) #define TP_STATUS_SENDING (1 << 1) #define TP_STATUS_WRONG_FORMAT (1 << 2) -#define TP_STATUS_TS_SOFTWARE (1 << 29) /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ +#define TP_STATUS_TS_SOFTWARE (1 << 29) #define TP_STATUS_TS_SYS_HARDWARE (1 << 30) #define TP_STATUS_TS_RAW_HARDWARE (1 << 31) #define TP_FT_REQ_FILL_RXHASH 0x1 -struct tpacket_hdr { /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ +struct tpacket_hdr { unsigned long tp_status; unsigned int tp_len; unsigned int tp_snaplen; - unsigned short tp_mac; /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ + unsigned short tp_mac; unsigned short tp_net; unsigned int tp_sec; unsigned int tp_usec; -}; /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ +}; #define TPACKET_ALIGNMENT 16 #define TPACKET_ALIGN(x) (((x) + TPACKET_ALIGNMENT - 1) & ~(TPACKET_ALIGNMENT - 1)) #define TPACKET_HDRLEN (TPACKET_ALIGN(sizeof(struct tpacket_hdr)) + sizeof(struct sockaddr_ll)) -struct tpacket2_hdr { /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ +struct tpacket2_hdr { __u32 tp_status; __u32 tp_len; __u32 tp_snaplen; - __u16 tp_mac; /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ + __u16 tp_mac; __u16 tp_net; __u32 tp_sec; __u32 tp_nsec; - __u16 tp_vlan_tci; /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ + __u16 tp_vlan_tci; __u16 tp_vlan_tpid; __u8 tp_padding[4]; }; -struct tpacket_hdr_variant1 { /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ +struct tpacket_hdr_variant1 { __u32 tp_rxhash; __u32 tp_vlan_tci; __u16 tp_vlan_tpid; - __u16 tp_padding; /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ + __u16 tp_padding; }; struct tpacket3_hdr { __u32 tp_next_offset; - __u32 tp_sec; /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ + __u32 tp_sec; __u32 tp_nsec; __u32 tp_snaplen; __u32 tp_len; - __u32 tp_status; /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ + __u32 tp_status; __u16 tp_mac; __u16 tp_net; union { - struct tpacket_hdr_variant1 hv1; /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ + struct tpacket_hdr_variant1 hv1; }; __u8 tp_padding[8]; }; -struct tpacket_bd_ts { /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ +struct tpacket_bd_ts { unsigned int ts_sec; union { unsigned int ts_usec; - unsigned int ts_nsec; /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ + unsigned int ts_nsec; }; }; struct tpacket_hdr_v1 { - __u32 block_status; /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ + __u32 block_status; __u32 num_pkts; __u32 offset_to_first_pkt; __u32 blk_len; - __aligned_u64 seq_num; /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ + __aligned_u64 seq_num; struct tpacket_bd_ts ts_first_pkt, ts_last_pkt; }; union tpacket_bd_header_u { - struct tpacket_hdr_v1 bh1; /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ + struct tpacket_hdr_v1 bh1; }; struct tpacket_block_desc { __u32 version; - __u32 offset_to_priv; /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ + __u32 offset_to_priv; union tpacket_bd_header_u hdr; }; #define TPACKET2_HDRLEN (TPACKET_ALIGN(sizeof(struct tpacket2_hdr)) + sizeof(struct sockaddr_ll)) -#define TPACKET3_HDRLEN (TPACKET_ALIGN(sizeof(struct tpacket3_hdr)) + sizeof(struct sockaddr_ll)) /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ +#define TPACKET3_HDRLEN (TPACKET_ALIGN(sizeof(struct tpacket3_hdr)) + sizeof(struct sockaddr_ll)) enum tpacket_versions { TPACKET_V1, TPACKET_V2, - TPACKET_V3 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ + TPACKET_V3 }; struct tpacket_req { unsigned int tp_block_size; - unsigned int tp_block_nr; /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ + unsigned int tp_block_nr; unsigned int tp_frame_size; unsigned int tp_frame_nr; }; -struct tpacket_req3 { /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ +struct tpacket_req3 { unsigned int tp_block_size; unsigned int tp_block_nr; unsigned int tp_frame_size; - unsigned int tp_frame_nr; /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ + unsigned int tp_frame_nr; unsigned int tp_retire_blk_tov; unsigned int tp_sizeof_priv; unsigned int tp_feature_req_word; -}; /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ +}; union tpacket_req_u { struct tpacket_req req; struct tpacket_req3 req3; -}; /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ +}; struct packet_mreq { int mr_ifindex; unsigned short mr_type; - unsigned short mr_alen; /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ + unsigned short mr_alen; unsigned char mr_address[8]; }; #define PACKET_MR_MULTICAST 0 -#define PACKET_MR_PROMISC 1 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ +#define PACKET_MR_PROMISC 1 #define PACKET_MR_ALLMULTI 2 #define PACKET_MR_UNICAST 3 #endif +/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ diff --git a/libc/kernel/uapi/linux/time.h b/libc/kernel/uapi/linux/time.h index bf245fc..5690d27 100644 --- a/libc/kernel/uapi/linux/time.h +++ b/libc/kernel/uapi/linux/time.h @@ -67,9 +67,10 @@ struct itimerval { #define CLOCK_SGI_CYCLE 10 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ #define CLOCK_TAI 11 +#define CLOCK_POWEROFF_ALARM 12 #define MAX_CLOCKS 16 #define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC) -#define CLOCKS_MONO CLOCK_MONOTONIC /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */ +#define CLOCKS_MONO CLOCK_MONOTONIC #define TIMER_ABSTIME 0x01 #endif diff --git a/libc/upstream-netbsd/lib/libc/gen/popen.c b/libc/upstream-netbsd/lib/libc/gen/popen.c index 593e346..b6ce47c 100644 --- a/libc/upstream-netbsd/lib/libc/gen/popen.c +++ b/libc/upstream-netbsd/lib/libc/gen/popen.c @@ -152,6 +152,8 @@ popen(const char *command, const char *type) } execl(_PATH_BSHELL, "sh", "-c", command, NULL); + if (errno == ENOENT) + execl(_PATH_BSHELL2, "sh", "-c", command, NULL); _exit(127); /* NOTREACHED */ } diff --git a/libm/Android.mk b/libm/Android.mk index e919129..f053e25 100644 --- a/libm/Android.mk +++ b/libm/Android.mk @@ -107,8 +107,6 @@ LOCAL_SRC_FILES := \ upstream-freebsd/lib/msun/src/s_exp2.c \ upstream-freebsd/lib/msun/src/s_exp2f.c \ upstream-freebsd/lib/msun/src/s_expm1f.c \ - upstream-freebsd/lib/msun/src/s_fabs.c \ - upstream-freebsd/lib/msun/src/s_fabsf.c \ upstream-freebsd/lib/msun/src/s_fdim.c \ upstream-freebsd/lib/msun/src/s_finite.c \ upstream-freebsd/lib/msun/src/s_finitef.c \ @@ -174,7 +172,6 @@ LOCAL_SRC_FILES_64 := \ upstream-freebsd/lib/msun/src/s_copysignl.c \ upstream-freebsd/lib/msun/src/e_coshl.c \ upstream-freebsd/lib/msun/src/s_cosl.c \ - upstream-freebsd/lib/msun/src/s_fabsl.c \ upstream-freebsd/lib/msun/src/s_floorl.c \ upstream-freebsd/lib/msun/src/s_fmal.c \ upstream-freebsd/lib/msun/src/s_fmaxl.c \ @@ -227,6 +224,10 @@ LOCAL_SRC_FILES += \ LOCAL_SRC_FILES += \ signbit.c \ +# Home-grown stuff. +LOCAL_SRC_FILES += \ + fabs.cpp \ + # Arch specific optimizations. # ----------------------------------------------------------------------------- @@ -282,9 +283,8 @@ LOCAL_SRC_FILES_arm += \ else LOCAL_SRC_FILES_arm += \ - arm/e_sqrt.S \ - arm/e_sqrtf.S \ - arm/s_floor.S \ + arm/sqrt.S \ + arm/floor.S \ endif @@ -481,8 +481,10 @@ LOCAL_C_INCLUDES_64 += $(LOCAL_PATH)/upstream-freebsd/lib/msun/ld128/ LOCAL_CLANG := $(libm_clang) LOCAL_ARM_MODE := arm LOCAL_CFLAGS := \ + -D__BIONIC_NO_MATH_INLINES \ -DFLT_EVAL_METHOD=0 \ -include $(LOCAL_PATH)/freebsd-compat.h \ + -Werror \ -Wno-missing-braces \ -Wno-parentheses \ -Wno-sign-compare \ diff --git a/libm/arm/e_sqrtf.S b/libm/arm/e_sqrtf.S deleted file mode 100644 index ddefb22..0000000 --- a/libm/arm/e_sqrtf.S +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2013-2014, NVIDIA Corporation. All rights reserved. - * Johhnny Qiu <joqiu@nvidia.com> - * Shu Zhang <chazhang@nvidia.com> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * * Neither the name of The Linux Foundation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN - * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <private/bionic_asm.h> - -ENTRY(sqrtf) - vmov.f32 s0, r0 - vsqrt.f32 s0, s0 - vmov.f32 r0, s0 - bx lr -END(sqrtf) diff --git a/libm/arm/s_floor.S b/libm/arm/floor.S index 3af8f76..3af8f76 100644 --- a/libm/arm/s_floor.S +++ b/libm/arm/floor.S diff --git a/libm/arm/e_sqrt.S b/libm/arm/sqrt.S index 17312f5..f2981f4 100644 --- a/libm/arm/e_sqrt.S +++ b/libm/arm/sqrt.S @@ -39,4 +39,11 @@ ENTRY(sqrt) bx lr END(sqrt) +ENTRY(sqrtf) + vmov.f32 s0, r0 + vsqrt.f32 s0, s0 + vmov.f32 r0, s0 + bx lr +END(sqrtf) + ALIAS_SYMBOL(sqrtl, sqrt); diff --git a/libm/arm64/fenv.c b/libm/arm64/fenv.c index ce560a7..19a2393 100644 --- a/libm/arm64/fenv.c +++ b/libm/arm64/fenv.c @@ -26,6 +26,7 @@ * $FreeBSD: libm/aarch64/fenv.c $ */ +#include <stdint.h> #include <fenv.h> #define FPCR_EXCEPT_SHIFT 8 @@ -38,10 +39,20 @@ const fenv_t __fe_dfl_env = { 0 /* control */, 0 /* status */}; typedef __uint32_t fpu_control_t; // FPCR, Floating-point Control Register. typedef __uint32_t fpu_status_t; // FPSR, Floating-point Status Register. -#define __get_fpcr(__fpcr) __asm__ __volatile__("mrs %0,fpcr" : "=r" (__fpcr)) -#define __get_fpsr(__fpsr) __asm__ __volatile__("mrs %0,fpsr" : "=r" (__fpsr)) -#define __set_fpcr(__fpcr) __asm__ __volatile__("msr fpcr,%0" : :"ri" (__fpcr)) -#define __set_fpsr(__fpsr) __asm__ __volatile__("msr fpsr,%0" : :"ri" (__fpsr)) +#define __get(REGISTER, __value) { \ + uint64_t __value64; \ + __asm__ __volatile__("mrs %0," REGISTER : "=r" (__value64)); \ + __value = (__uint32_t) __value64; \ +} +#define __get_fpcr(__fpcr) __get("fpcr", __fpcr) +#define __get_fpsr(__fpsr) __get("fpsr", __fpsr) + +#define __set(REGISTER, __value) { \ + uint64_t __value64 = __value; \ + __asm__ __volatile__("msr " REGISTER ",%0" : : "ri" (__value64)); \ +} +#define __set_fpcr(__fpcr) __set("fpcr", __fpcr) +#define __set_fpsr(__fpsr) __set("fpsr", __fpsr) int fegetenv(fenv_t* envp) { __get_fpcr(envp->__control); diff --git a/libm/fabs.cpp b/libm/fabs.cpp new file mode 100644 index 0000000..add73fe --- /dev/null +++ b/libm/fabs.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <math.h> + +#include "fpmath.h" + +double fabs(double x) { +#if __arm__ + // Both Clang and GCC insist on moving r0/r1 into a double register + // and using fabs where bit-twiddling would be a better choice. + // They get fabsf right, but we need to be careful in fabsl too. + IEEEd2bits u; + u.d = x; + u.bits.sign = 0; + return u.d; +#else + return __builtin_fabs(x); +#endif +} + +float fabsf(float x) { + return __builtin_fabsf(x); +} + +#if defined(__LP64__) +long double fabsl(long double x) { return __builtin_fabsl(x); } +#else +long double fabsl(long double x) { + // Don't use __builtin_fabs here because of ARM. (See fabs above.) + return fabs(x); +} +#endif diff --git a/libm/fake_long_double.c b/libm/fake_long_double.c index 317a115..5edf839 100644 --- a/libm/fake_long_double.c +++ b/libm/fake_long_double.c @@ -25,7 +25,6 @@ */ long double copysignl(long double a1, long double a2) { return copysign(a1, a2); } -long double fabsl(long double a1) { return fabs(a1); } long double fmaxl(long double a1, long double a2) { return fmax(a1, a2); } long double fmodl(long double a1, long double a2) { return fmod(a1, a2); } long double fminl(long double a1, long double a2) { return fmin(a1, a2); } diff --git a/libm/include/math.h b/libm/include/math.h index 1542374..ce8e3b2 100644 --- a/libm/include/math.h +++ b/libm/include/math.h @@ -15,116 +15,70 @@ */ #ifndef _MATH_H_ -#define _MATH_H_ +#define _MATH_H_ #include <sys/cdefs.h> #include <limits.h> +#if !defined(__BIONIC_NO_MATH_INLINES) +#define __BIONIC_MATH_INLINE(__def) extern __inline__ __always_inline __attribute__((gnu_inline)) __attribute__((__artificial__)) __def +#else +#define __BIONIC_MATH_INLINE(__def) +#endif + __BEGIN_DECLS #pragma GCC visibility push(default) -/* - * ANSI/POSIX - */ -extern const union __infinity_un { - unsigned char __uc[8]; - double __ud; -} __infinity; - -extern const union __nan_un { - unsigned char __uc[sizeof(float)]; - float __uf; -} __nan; - -#if __GNUC_PREREQ(3, 3) || (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 800) -#define __MATH_BUILTIN_CONSTANTS -#endif +#define HUGE_VAL __builtin_huge_val() -#if __GNUC_PREREQ(3, 0) && !defined(__INTEL_COMPILER) -#define __MATH_BUILTIN_RELOPS -#endif +#if __ISO_C_VISIBLE >= 1999 +#define FP_ILOGB0 (-INT_MAX) +#define FP_ILOGBNAN INT_MAX -#ifdef __MATH_BUILTIN_CONSTANTS -#define HUGE_VAL __builtin_huge_val() -#else -#define HUGE_VAL (__infinity.__ud) -#endif +#define HUGE_VALF __builtin_huge_valf() +#define HUGE_VALL __builtin_huge_vall() +#define INFINITY __builtin_inff() +#define NAN __builtin_nanf("") -#if __ISO_C_VISIBLE >= 1999 -#define FP_ILOGB0 (-INT_MAX) /* Android-changed */ -#define FP_ILOGBNAN INT_MAX /* Android-changed */ - -#ifdef __MATH_BUILTIN_CONSTANTS -#define HUGE_VALF __builtin_huge_valf() -#define HUGE_VALL __builtin_huge_vall() -#define INFINITY __builtin_inff() -#define NAN __builtin_nanf("") -#else -#define HUGE_VALF (float)HUGE_VAL -#define HUGE_VALL (long double)HUGE_VAL -#define INFINITY HUGE_VALF -#define NAN (__nan.__uf) -#endif /* __MATH_BUILTIN_CONSTANTS */ - -#define MATH_ERRNO 1 -#define MATH_ERREXCEPT 2 -#define math_errhandling MATH_ERREXCEPT - -#define FP_FAST_FMAF 1 -#ifdef __ia64__ -#define FP_FAST_FMA 1 -#define FP_FAST_FMAL 1 +#define MATH_ERRNO 1 +#define MATH_ERREXCEPT 2 +#define math_errhandling MATH_ERREXCEPT + +#if defined(__FP_FAST_FMA) +#define FP_FAST_FMA 1 +#endif +#if defined(__FP_FAST_FMAF) +#define FP_FAST_FMAF 1 +#endif +#if defined(__FP_FAST_FMAL) +#define FP_FAST_FMAL 1 #endif /* Symbolic constants to classify floating point numbers. */ -#define FP_INFINITE 0x01 -#define FP_NAN 0x02 -#define FP_NORMAL 0x04 -#define FP_SUBNORMAL 0x08 -#define FP_ZERO 0x10 -#define fpclassify(x) \ - ((sizeof (x) == sizeof (float)) ? __fpclassifyf(x) \ - : (sizeof (x) == sizeof (double)) ? __fpclassifyd(x) \ - : __fpclassifyl(x)) - -#define isfinite(x) \ - ((sizeof (x) == sizeof (float)) ? __isfinitef(x) \ - : (sizeof (x) == sizeof (double)) ? __isfinite(x) \ - : __isfinitel(x)) -#define isinf(x) \ - ((sizeof (x) == sizeof (float)) ? __isinff(x) \ - : (sizeof (x) == sizeof (double)) ? isinf(x) \ - : __isinfl(x)) -#define isnan(x) \ - ((sizeof (x) == sizeof (float)) ? __isnanf(x) \ - : (sizeof (x) == sizeof (double)) ? isnan(x) \ - : __isnanl(x)) -#define isnormal(x) \ - ((sizeof (x) == sizeof (float)) ? __isnormalf(x) \ - : (sizeof (x) == sizeof (double)) ? __isnormal(x) \ - : __isnormall(x)) - -#ifdef __MATH_BUILTIN_RELOPS -#define isgreater(x, y) __builtin_isgreater((x), (y)) -#define isgreaterequal(x, y) __builtin_isgreaterequal((x), (y)) -#define isless(x, y) __builtin_isless((x), (y)) -#define islessequal(x, y) __builtin_islessequal((x), (y)) -#define islessgreater(x, y) __builtin_islessgreater((x), (y)) -#define isunordered(x, y) __builtin_isunordered((x), (y)) -#else -#define isgreater(x, y) (!isunordered((x), (y)) && (x) > (y)) -#define isgreaterequal(x, y) (!isunordered((x), (y)) && (x) >= (y)) -#define isless(x, y) (!isunordered((x), (y)) && (x) < (y)) -#define islessequal(x, y) (!isunordered((x), (y)) && (x) <= (y)) -#define islessgreater(x, y) (!isunordered((x), (y)) && \ - ((x) > (y) || (y) > (x))) -#define isunordered(x, y) (isnan(x) || isnan(y)) -#endif /* __MATH_BUILTIN_RELOPS */ - -#define signbit(x) \ - ((sizeof (x) == sizeof (float)) ? __signbitf(x) \ - : (sizeof (x) == sizeof (double)) ? __signbit(x) \ - : __signbitl(x)) +#define FP_INFINITE 0x01 +#define FP_NAN 0x02 +#define FP_NORMAL 0x04 +#define FP_SUBNORMAL 0x08 +#define FP_ZERO 0x10 +#define fpclassify(x) \ + __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, x) + +#define isfinite(x) __builtin_isfinite(x) +#define isinf(x) __builtin_isinf(x) +#define isnan(x) __builtin_isnan(x) +#define isnormal(x) __builtin_isnormal(x) + +#define isgreater(x, y) __builtin_isgreater((x), (y)) +#define isgreaterequal(x, y) __builtin_isgreaterequal((x), (y)) +#define isless(x, y) __builtin_isless((x), (y)) +#define islessequal(x, y) __builtin_islessequal((x), (y)) +#define islessgreater(x, y) __builtin_islessgreater((x), (y)) +#define isunordered(x, y) __builtin_isunordered((x), (y)) + +#define signbit(x) \ + ((sizeof(x) == sizeof(float)) ? __builtin_signbitf(x) \ + : (sizeof(x) == sizeof(double)) ? __builtin_signbit(x) \ + : __builtin_signbitl(x)) typedef double __double_t; typedef __double_t double_t; @@ -213,6 +167,7 @@ double sqrt(double); double ceil(double); double fabs(double) __pure2; +__BIONIC_MATH_INLINE(double fabs(double x) { return __builtin_fabs(x); }) double floor(double); double fmod(double, double); @@ -331,6 +286,7 @@ float sqrtf(float); float ceilf(float); float fabsf(float) __pure2; +__BIONIC_MATH_INLINE(float fabsf(float x) { return __builtin_fabsf(x); }) float floorf(float); float fmodf(float, float); float roundf(float); @@ -418,6 +374,7 @@ long double exp2l(long double); long double expl(long double); long double expm1l(long double); long double fabsl(long double) __pure2; +__BIONIC_MATH_INLINE(long double fabsl(long double x) { return __builtin_fabsl(x); }) long double fdiml(long double, long double); long double floorl(long double); long double fmal(long double, long double, long double); diff --git a/libm/upstream-freebsd/lib/msun/ld128/k_expl.h b/libm/upstream-freebsd/lib/msun/ld128/k_expl.h index a5668fd..e843d43 100644 --- a/libm/upstream-freebsd/lib/msun/ld128/k_expl.h +++ b/libm/upstream-freebsd/lib/msun/ld128/k_expl.h @@ -29,7 +29,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/lib/msun/ld128/k_expl.h 275819 2014-12-16 09:21:56Z ed $"); /* * ld128 version of k_expl.h. See ../ld80/s_expl.c for most comments. @@ -322,7 +322,7 @@ __ldexp_cexpl(long double complex z, int expt) scale2 = 1; SET_LDBL_EXPSIGN(scale1, BIAS + expt - half_expt); - return (cpackl(cos(y) * exp_x * scale1 * scale2, + return (CMPLXL(cos(y) * exp_x * scale1 * scale2, sinl(y) * exp_x * scale1 * scale2)); } #endif /* _COMPLEX_H */ diff --git a/libm/upstream-freebsd/lib/msun/src/catrig.c b/libm/upstream-freebsd/lib/msun/src/catrig.c index 200977c..050a88b 100644 --- a/libm/upstream-freebsd/lib/msun/src/catrig.c +++ b/libm/upstream-freebsd/lib/msun/src/catrig.c @@ -25,7 +25,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/lib/msun/src/catrig.c 275819 2014-12-16 09:21:56Z ed $"); #include <complex.h> #include <float.h> @@ -286,19 +286,19 @@ casinh(double complex z) if (isnan(x) || isnan(y)) { /* casinh(+-Inf + I*NaN) = +-Inf + I*NaN */ if (isinf(x)) - return (cpack(x, y + y)); + return (CMPLX(x, y + y)); /* casinh(NaN + I*+-Inf) = opt(+-)Inf + I*NaN */ if (isinf(y)) - return (cpack(y, x + x)); + return (CMPLX(y, x + x)); /* casinh(NaN + I*0) = NaN + I*0 */ if (y == 0) - return (cpack(x + x, y)); + return (CMPLX(x + x, y)); /* * All other cases involving NaN return NaN + I*NaN. * C99 leaves it optional whether to raise invalid if one of * the arguments is not NaN, so we opt not to raise it. */ - return (cpack(x + 0.0L + (y + 0), x + 0.0L + (y + 0))); + return (CMPLX(x + 0.0L + (y + 0), x + 0.0L + (y + 0))); } if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) { @@ -307,7 +307,7 @@ casinh(double complex z) w = clog_for_large_values(z) + m_ln2; else w = clog_for_large_values(-z) + m_ln2; - return (cpack(copysign(creal(w), x), copysign(cimag(w), y))); + return (CMPLX(copysign(creal(w), x), copysign(cimag(w), y))); } /* Avoid spuriously raising inexact for z = 0. */ @@ -325,7 +325,7 @@ casinh(double complex z) ry = asin(B); else ry = atan2(new_y, sqrt_A2my2); - return (cpack(copysign(rx, x), copysign(ry, y))); + return (CMPLX(copysign(rx, x), copysign(ry, y))); } /* @@ -335,9 +335,9 @@ casinh(double complex z) double complex casin(double complex z) { - double complex w = casinh(cpack(cimag(z), creal(z))); + double complex w = casinh(CMPLX(cimag(z), creal(z))); - return (cpack(cimag(w), creal(w))); + return (CMPLX(cimag(w), creal(w))); } /* @@ -370,19 +370,19 @@ cacos(double complex z) if (isnan(x) || isnan(y)) { /* cacos(+-Inf + I*NaN) = NaN + I*opt(-)Inf */ if (isinf(x)) - return (cpack(y + y, -INFINITY)); + return (CMPLX(y + y, -INFINITY)); /* cacos(NaN + I*+-Inf) = NaN + I*-+Inf */ if (isinf(y)) - return (cpack(x + x, -y)); + return (CMPLX(x + x, -y)); /* cacos(0 + I*NaN) = PI/2 + I*NaN with inexact */ if (x == 0) - return (cpack(pio2_hi + pio2_lo, y + y)); + return (CMPLX(pio2_hi + pio2_lo, y + y)); /* * All other cases involving NaN return NaN + I*NaN. * C99 leaves it optional whether to raise invalid if one of * the arguments is not NaN, so we opt not to raise it. */ - return (cpack(x + 0.0L + (y + 0), x + 0.0L + (y + 0))); + return (CMPLX(x + 0.0L + (y + 0), x + 0.0L + (y + 0))); } if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) { @@ -392,18 +392,18 @@ cacos(double complex z) ry = creal(w) + m_ln2; if (sy == 0) ry = -ry; - return (cpack(rx, ry)); + return (CMPLX(rx, ry)); } /* Avoid spuriously raising inexact for z = 1. */ if (x == 1 && y == 0) - return (cpack(0, -y)); + return (CMPLX(0, -y)); /* All remaining cases are inexact. */ raise_inexact(); if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4) - return (cpack(pio2_hi - (x - pio2_lo), -y)); + return (CMPLX(pio2_hi - (x - pio2_lo), -y)); do_hard_work(ay, ax, &ry, &B_is_usable, &B, &sqrt_A2mx2, &new_x); if (B_is_usable) { @@ -419,7 +419,7 @@ cacos(double complex z) } if (sy == 0) ry = -ry; - return (cpack(rx, ry)); + return (CMPLX(rx, ry)); } /* @@ -437,15 +437,15 @@ cacosh(double complex z) ry = cimag(w); /* cacosh(NaN + I*NaN) = NaN + I*NaN */ if (isnan(rx) && isnan(ry)) - return (cpack(ry, rx)); + return (CMPLX(ry, rx)); /* cacosh(NaN + I*+-Inf) = +Inf + I*NaN */ /* cacosh(+-Inf + I*NaN) = +Inf + I*NaN */ if (isnan(rx)) - return (cpack(fabs(ry), rx)); + return (CMPLX(fabs(ry), rx)); /* cacosh(0 + I*NaN) = NaN + I*NaN */ if (isnan(ry)) - return (cpack(ry, ry)); - return (cpack(fabs(ry), copysign(rx, cimag(z)))); + return (CMPLX(ry, ry)); + return (CMPLX(fabs(ry), copysign(rx, cimag(z)))); } /* @@ -475,16 +475,16 @@ clog_for_large_values(double complex z) * this method is still poor since it is uneccessarily slow. */ if (ax > DBL_MAX / 2) - return (cpack(log(hypot(x / m_e, y / m_e)) + 1, atan2(y, x))); + return (CMPLX(log(hypot(x / m_e, y / m_e)) + 1, atan2(y, x))); /* * Avoid overflow when x or y is large. Avoid underflow when x or * y is small. */ if (ax > QUARTER_SQRT_MAX || ay < SQRT_MIN) - return (cpack(log(hypot(x, y)), atan2(y, x))); + return (CMPLX(log(hypot(x, y)), atan2(y, x))); - return (cpack(log(ax * ax + ay * ay) / 2, atan2(y, x))); + return (CMPLX(log(ax * ax + ay * ay) / 2, atan2(y, x))); } /* @@ -575,30 +575,30 @@ catanh(double complex z) /* This helps handle many cases. */ if (y == 0 && ax <= 1) - return (cpack(atanh(x), y)); + return (CMPLX(atanh(x), y)); /* To ensure the same accuracy as atan(), and to filter out z = 0. */ if (x == 0) - return (cpack(x, atan(y))); + return (CMPLX(x, atan(y))); if (isnan(x) || isnan(y)) { /* catanh(+-Inf + I*NaN) = +-0 + I*NaN */ if (isinf(x)) - return (cpack(copysign(0, x), y + y)); + return (CMPLX(copysign(0, x), y + y)); /* catanh(NaN + I*+-Inf) = sign(NaN)0 + I*+-PI/2 */ if (isinf(y)) - return (cpack(copysign(0, x), + return (CMPLX(copysign(0, x), copysign(pio2_hi + pio2_lo, y))); /* * All other cases involving NaN return NaN + I*NaN. * C99 leaves it optional whether to raise invalid if one of * the arguments is not NaN, so we opt not to raise it. */ - return (cpack(x + 0.0L + (y + 0), x + 0.0L + (y + 0))); + return (CMPLX(x + 0.0L + (y + 0), x + 0.0L + (y + 0))); } if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) - return (cpack(real_part_reciprocal(x, y), + return (CMPLX(real_part_reciprocal(x, y), copysign(pio2_hi + pio2_lo, y))); if (ax < SQRT_3_EPSILON / 2 && ay < SQRT_3_EPSILON / 2) { @@ -623,7 +623,7 @@ catanh(double complex z) else ry = atan2(2 * ay, (1 - ax) * (1 + ax) - ay * ay) / 2; - return (cpack(copysign(rx, x), copysign(ry, y))); + return (CMPLX(copysign(rx, x), copysign(ry, y))); } /* @@ -633,7 +633,7 @@ catanh(double complex z) double complex catan(double complex z) { - double complex w = catanh(cpack(cimag(z), creal(z))); + double complex w = catanh(CMPLX(cimag(z), creal(z))); - return (cpack(cimag(w), creal(w))); + return (CMPLX(cimag(w), creal(w))); } diff --git a/libm/upstream-freebsd/lib/msun/src/catrigf.c b/libm/upstream-freebsd/lib/msun/src/catrigf.c index 08ebef7..e057d31 100644 --- a/libm/upstream-freebsd/lib/msun/src/catrigf.c +++ b/libm/upstream-freebsd/lib/msun/src/catrigf.c @@ -39,7 +39,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/lib/msun/src/catrigf.c 275819 2014-12-16 09:21:56Z ed $"); #include <complex.h> #include <float.h> @@ -156,12 +156,12 @@ casinhf(float complex z) if (isnan(x) || isnan(y)) { if (isinf(x)) - return (cpackf(x, y + y)); + return (CMPLXF(x, y + y)); if (isinf(y)) - return (cpackf(y, x + x)); + return (CMPLXF(y, x + x)); if (y == 0) - return (cpackf(x + x, y)); - return (cpackf(x + 0.0L + (y + 0), x + 0.0L + (y + 0))); + return (CMPLXF(x + x, y)); + return (CMPLXF(x + 0.0L + (y + 0), x + 0.0L + (y + 0))); } if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) { @@ -169,7 +169,7 @@ casinhf(float complex z) w = clog_for_large_values(z) + m_ln2; else w = clog_for_large_values(-z) + m_ln2; - return (cpackf(copysignf(crealf(w), x), + return (CMPLXF(copysignf(crealf(w), x), copysignf(cimagf(w), y))); } @@ -186,15 +186,15 @@ casinhf(float complex z) ry = asinf(B); else ry = atan2f(new_y, sqrt_A2my2); - return (cpackf(copysignf(rx, x), copysignf(ry, y))); + return (CMPLXF(copysignf(rx, x), copysignf(ry, y))); } float complex casinf(float complex z) { - float complex w = casinhf(cpackf(cimagf(z), crealf(z))); + float complex w = casinhf(CMPLXF(cimagf(z), crealf(z))); - return (cpackf(cimagf(w), crealf(w))); + return (CMPLXF(cimagf(w), crealf(w))); } float complex @@ -214,12 +214,12 @@ cacosf(float complex z) if (isnan(x) || isnan(y)) { if (isinf(x)) - return (cpackf(y + y, -INFINITY)); + return (CMPLXF(y + y, -INFINITY)); if (isinf(y)) - return (cpackf(x + x, -y)); + return (CMPLXF(x + x, -y)); if (x == 0) - return (cpackf(pio2_hi + pio2_lo, y + y)); - return (cpackf(x + 0.0L + (y + 0), x + 0.0L + (y + 0))); + return (CMPLXF(pio2_hi + pio2_lo, y + y)); + return (CMPLXF(x + 0.0L + (y + 0), x + 0.0L + (y + 0))); } if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) { @@ -228,16 +228,16 @@ cacosf(float complex z) ry = crealf(w) + m_ln2; if (sy == 0) ry = -ry; - return (cpackf(rx, ry)); + return (CMPLXF(rx, ry)); } if (x == 1 && y == 0) - return (cpackf(0, -y)); + return (CMPLXF(0, -y)); raise_inexact(); if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4) - return (cpackf(pio2_hi - (x - pio2_lo), -y)); + return (CMPLXF(pio2_hi - (x - pio2_lo), -y)); do_hard_work(ay, ax, &ry, &B_is_usable, &B, &sqrt_A2mx2, &new_x); if (B_is_usable) { @@ -253,7 +253,7 @@ cacosf(float complex z) } if (sy == 0) ry = -ry; - return (cpackf(rx, ry)); + return (CMPLXF(rx, ry)); } float complex @@ -266,12 +266,12 @@ cacoshf(float complex z) rx = crealf(w); ry = cimagf(w); if (isnan(rx) && isnan(ry)) - return (cpackf(ry, rx)); + return (CMPLXF(ry, rx)); if (isnan(rx)) - return (cpackf(fabsf(ry), rx)); + return (CMPLXF(fabsf(ry), rx)); if (isnan(ry)) - return (cpackf(ry, ry)); - return (cpackf(fabsf(ry), copysignf(rx, cimagf(z)))); + return (CMPLXF(ry, ry)); + return (CMPLXF(fabsf(ry), copysignf(rx, cimagf(z)))); } static float complex @@ -291,13 +291,13 @@ clog_for_large_values(float complex z) } if (ax > FLT_MAX / 2) - return (cpackf(logf(hypotf(x / m_e, y / m_e)) + 1, + return (CMPLXF(logf(hypotf(x / m_e, y / m_e)) + 1, atan2f(y, x))); if (ax > QUARTER_SQRT_MAX || ay < SQRT_MIN) - return (cpackf(logf(hypotf(x, y)), atan2f(y, x))); + return (CMPLXF(logf(hypotf(x, y)), atan2f(y, x))); - return (cpackf(logf(ax * ax + ay * ay) / 2, atan2f(y, x))); + return (CMPLXF(logf(ax * ax + ay * ay) / 2, atan2f(y, x))); } static inline float @@ -346,22 +346,22 @@ catanhf(float complex z) ay = fabsf(y); if (y == 0 && ax <= 1) - return (cpackf(atanhf(x), y)); + return (CMPLXF(atanhf(x), y)); if (x == 0) - return (cpackf(x, atanf(y))); + return (CMPLXF(x, atanf(y))); if (isnan(x) || isnan(y)) { if (isinf(x)) - return (cpackf(copysignf(0, x), y + y)); + return (CMPLXF(copysignf(0, x), y + y)); if (isinf(y)) - return (cpackf(copysignf(0, x), + return (CMPLXF(copysignf(0, x), copysignf(pio2_hi + pio2_lo, y))); - return (cpackf(x + 0.0L + (y + 0), x + 0.0L + (y + 0))); + return (CMPLXF(x + 0.0L + (y + 0), x + 0.0L + (y + 0))); } if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) - return (cpackf(real_part_reciprocal(x, y), + return (CMPLXF(real_part_reciprocal(x, y), copysignf(pio2_hi + pio2_lo, y))); if (ax < SQRT_3_EPSILON / 2 && ay < SQRT_3_EPSILON / 2) { @@ -381,13 +381,13 @@ catanhf(float complex z) else ry = atan2f(2 * ay, (1 - ax) * (1 + ax) - ay * ay) / 2; - return (cpackf(copysignf(rx, x), copysignf(ry, y))); + return (CMPLXF(copysignf(rx, x), copysignf(ry, y))); } float complex catanf(float complex z) { - float complex w = catanhf(cpackf(cimagf(z), crealf(z))); + float complex w = catanhf(CMPLXF(cimagf(z), crealf(z))); - return (cpackf(cimagf(w), crealf(w))); + return (CMPLXF(cimagf(w), crealf(w))); } diff --git a/libm/upstream-freebsd/lib/msun/src/e_j0.c b/libm/upstream-freebsd/lib/msun/src/e_j0.c index 8320f25..36e72c2 100644 --- a/libm/upstream-freebsd/lib/msun/src/e_j0.c +++ b/libm/upstream-freebsd/lib/msun/src/e_j0.c @@ -12,7 +12,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/lib/msun/src/e_j0.c 283032 2015-05-17 16:27:06Z kargl $"); /* __ieee754_j0(x), __ieee754_y0(x) * Bessel function of the first and second kinds of order zero. @@ -62,7 +62,9 @@ __FBSDID("$FreeBSD$"); #include "math.h" #include "math_private.h" -static double pzero(double), qzero(double); +static __inline double pzero(double), qzero(double); + +static const volatile double vone = 1, vzero = 0; static const double huge = 1e300, @@ -115,7 +117,7 @@ __ieee754_j0(double x) if(ix<0x3f200000) { /* |x| < 2**-13 */ if(huge+x>one) { /* raise inexact if x != 0 */ if(ix<0x3e400000) return one; /* |x|<2**-27 */ - else return one - 0.25*x*x; + else return one - x*x/4; } } z = x*x; @@ -150,10 +152,16 @@ __ieee754_y0(double x) EXTRACT_WORDS(hx,lx,x); ix = 0x7fffffff&hx; - /* Y0(NaN) is NaN, y0(-inf) is Nan, y0(inf) is 0 */ - if(ix>=0x7ff00000) return one/(x+x*x); - if((ix|lx)==0) return -one/zero; - if(hx<0) return zero/zero; + /* + * y0(NaN) = NaN. + * y0(Inf) = 0. + * y0(-Inf) = NaN and raise invalid exception. + */ + if(ix>=0x7ff00000) return vone/(x+x*x); + /* y0(+-0) = -inf and raise divide-by-zero exception. */ + if((ix|lx)==0) return -one/vzero; + /* y0(x<0) = NaN and raise invalid exception. */ + if(hx<0) return vzero/vzero; if(ix >= 0x40000000) { /* |x| >= 2.0 */ /* y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x0)+q0(x)*cos(x0)) * where x0 = x-pi/4 @@ -268,7 +276,8 @@ static const double pS2[5] = { 1.46576176948256193810e+01, /* 0x402D50B3, 0x44391809 */ }; - static double pzero(double x) +static __inline double +pzero(double x) { const double *p,*q; double z,r,s; @@ -278,7 +287,7 @@ static const double pS2[5] = { if(ix>=0x40200000) {p = pR8; q= pS8;} else if(ix>=0x40122E8B){p = pR5; q= pS5;} else if(ix>=0x4006DB6D){p = pR3; q= pS3;} - else if(ix>=0x40000000){p = pR2; q= pS2;} + else {p = pR2; q= pS2;} /* ix>=0x40000000 */ z = one/(x*x); r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4])))); @@ -363,7 +372,8 @@ static const double qS2[6] = { -5.31095493882666946917e+00, /* 0xC0153E6A, 0xF8B32931 */ }; - static double qzero(double x) +static __inline double +qzero(double x) { const double *p,*q; double s,r,z; @@ -373,7 +383,7 @@ static const double qS2[6] = { if(ix>=0x40200000) {p = qR8; q= qS8;} else if(ix>=0x40122E8B){p = qR5; q= qS5;} else if(ix>=0x4006DB6D){p = qR3; q= qS3;} - else if(ix>=0x40000000){p = qR2; q= qS2;} + else {p = qR2; q= qS2;} /* ix>=0x40000000 */ z = one/(x*x); r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5]))))); diff --git a/libm/upstream-freebsd/lib/msun/src/e_j0f.c b/libm/upstream-freebsd/lib/msun/src/e_j0f.c index c45faf3..e53b218 100644 --- a/libm/upstream-freebsd/lib/msun/src/e_j0f.c +++ b/libm/upstream-freebsd/lib/msun/src/e_j0f.c @@ -14,12 +14,18 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/lib/msun/src/e_j0f.c 283032 2015-05-17 16:27:06Z kargl $"); + +/* + * See e_j0.c for complete comments. + */ #include "math.h" #include "math_private.h" -static float pzerof(float), qzerof(float); +static __inline float pzerof(float), qzerof(float); + +static const volatile float vone = 1, vzero = 0; static const float huge = 1e30, @@ -62,17 +68,17 @@ __ieee754_j0f(float x) * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x) * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x) */ - if(ix>0x80000000) z = (invsqrtpi*cc)/sqrtf(x); + if(ix>0x58000000) z = (invsqrtpi*cc)/sqrtf(x); /* |x|>2**49 */ else { u = pzerof(x); v = qzerof(x); z = invsqrtpi*(u*cc-v*ss)/sqrtf(x); } return z; } - if(ix<0x39000000) { /* |x| < 2**-13 */ + if(ix<0x3b000000) { /* |x| < 2**-9 */ if(huge+x>one) { /* raise inexact if x != 0 */ - if(ix<0x32000000) return one; /* |x|<2**-27 */ - else return one - (float)0.25*x*x; + if(ix<0x39800000) return one; /* |x|<2**-12 */ + else return one - x*x/4; } } z = x*x; @@ -107,10 +113,9 @@ __ieee754_y0f(float x) GET_FLOAT_WORD(hx,x); ix = 0x7fffffff&hx; - /* Y0(NaN) is NaN, y0(-inf) is Nan, y0(inf) is 0 */ - if(ix>=0x7f800000) return one/(x+x*x); - if(ix==0) return -one/zero; - if(hx<0) return zero/zero; + if(ix>=0x7f800000) return vone/(x+x*x); + if(ix==0) return -one/vzero; + if(hx<0) return vzero/vzero; if(ix >= 0x40000000) { /* |x| >= 2.0 */ /* y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x0)+q0(x)*cos(x0)) * where x0 = x-pi/4 @@ -136,14 +141,14 @@ __ieee754_y0f(float x) if ((s*c)<zero) cc = z/ss; else ss = z/cc; } - if(ix>0x80000000) z = (invsqrtpi*ss)/sqrtf(x); + if(ix>0x58000000) z = (invsqrtpi*ss)/sqrtf(x); /* |x|>2**49 */ else { u = pzerof(x); v = qzerof(x); z = invsqrtpi*(u*ss+v*cc)/sqrtf(x); } return z; } - if(ix<=0x32000000) { /* x < 2**-27 */ + if(ix<=0x39000000) { /* x < 2**-13 */ return(u00 + tpi*__ieee754_logf(x)); } z = x*x; @@ -224,7 +229,8 @@ static const float pS2[5] = { 1.4657617569e+01, /* 0x416a859a */ }; - static float pzerof(float x) +static __inline float +pzerof(float x) { const float *p,*q; float z,r,s; @@ -232,9 +238,9 @@ static const float pS2[5] = { GET_FLOAT_WORD(ix,x); ix &= 0x7fffffff; if(ix>=0x41000000) {p = pR8; q= pS8;} - else if(ix>=0x40f71c58){p = pR5; q= pS5;} - else if(ix>=0x4036db68){p = pR3; q= pS3;} - else if(ix>=0x40000000){p = pR2; q= pS2;} + else if(ix>=0x409173eb){p = pR5; q= pS5;} + else if(ix>=0x4036d917){p = pR3; q= pS3;} + else {p = pR2; q= pS2;} /* ix>=0x40000000 */ z = one/(x*x); r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4])))); @@ -319,7 +325,8 @@ static const float qS2[6] = { -5.3109550476e+00, /* 0xc0a9f358 */ }; - static float qzerof(float x) +static __inline float +qzerof(float x) { const float *p,*q; float s,r,z; @@ -327,9 +334,9 @@ static const float qS2[6] = { GET_FLOAT_WORD(ix,x); ix &= 0x7fffffff; if(ix>=0x41000000) {p = qR8; q= qS8;} - else if(ix>=0x40f71c58){p = qR5; q= qS5;} - else if(ix>=0x4036db68){p = qR3; q= qS3;} - else if(ix>=0x40000000){p = qR2; q= qS2;} + else if(ix>=0x409173eb){p = qR5; q= qS5;} + else if(ix>=0x4036d917){p = qR3; q= qS3;} + else {p = qR2; q= qS2;} /* ix>=0x40000000 */ z = one/(x*x); r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5]))))); diff --git a/libm/upstream-freebsd/lib/msun/src/e_j1.c b/libm/upstream-freebsd/lib/msun/src/e_j1.c index 63800ad..b11ac2d 100644 --- a/libm/upstream-freebsd/lib/msun/src/e_j1.c +++ b/libm/upstream-freebsd/lib/msun/src/e_j1.c @@ -12,7 +12,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/lib/msun/src/e_j1.c 283032 2015-05-17 16:27:06Z kargl $"); /* __ieee754_j1(x), __ieee754_y1(x) * Bessel function of the first and second kinds of order zero. @@ -62,7 +62,9 @@ __FBSDID("$FreeBSD$"); #include "math.h" #include "math_private.h" -static double pone(double), qone(double); +static __inline double pone(double), qone(double); + +static const volatile double vone = 1, vzero = 0; static const double huge = 1e300, @@ -147,10 +149,16 @@ __ieee754_y1(double x) EXTRACT_WORDS(hx,lx,x); ix = 0x7fffffff&hx; - /* if Y1(NaN) is NaN, Y1(-inf) is NaN, Y1(inf) is 0 */ - if(ix>=0x7ff00000) return one/(x+x*x); - if((ix|lx)==0) return -one/zero; - if(hx<0) return zero/zero; + /* + * y1(NaN) = NaN. + * y1(Inf) = 0. + * y1(-Inf) = NaN and raise invalid exception. + */ + if(ix>=0x7ff00000) return vone/(x+x*x); + /* y1(+-0) = -inf and raise divide-by-zero exception. */ + if((ix|lx)==0) return -one/vzero; + /* y1(x<0) = NaN and raise invalid exception. */ + if(hx<0) return vzero/vzero; if(ix >= 0x40000000) { /* |x| >= 2.0 */ s = sin(x); c = cos(x); @@ -262,7 +270,8 @@ static const double ps2[5] = { 8.36463893371618283368e+00, /* 0x4020BAB1, 0xF44E5192 */ }; - static double pone(double x) +static __inline double +pone(double x) { const double *p,*q; double z,r,s; @@ -272,7 +281,7 @@ static const double ps2[5] = { if(ix>=0x40200000) {p = pr8; q= ps8;} else if(ix>=0x40122E8B){p = pr5; q= ps5;} else if(ix>=0x4006DB6D){p = pr3; q= ps3;} - else if(ix>=0x40000000){p = pr2; q= ps2;} + else {p = pr2; q= ps2;} /* ix>=0x40000000 */ z = one/(x*x); r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4])))); @@ -358,7 +367,8 @@ static const double qs2[6] = { -4.95949898822628210127e+00, /* 0xC013D686, 0xE71BE86B */ }; - static double qone(double x) +static __inline double +qone(double x) { const double *p,*q; double s,r,z; @@ -368,7 +378,7 @@ static const double qs2[6] = { if(ix>=0x40200000) {p = qr8; q= qs8;} else if(ix>=0x40122E8B){p = qr5; q= qs5;} else if(ix>=0x4006DB6D){p = qr3; q= qs3;} - else if(ix>=0x40000000){p = qr2; q= qs2;} + else {p = qr2; q= qs2;} /* ix>=0x40000000 */ z = one/(x*x); r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5]))))); diff --git a/libm/upstream-freebsd/lib/msun/src/e_j1f.c b/libm/upstream-freebsd/lib/msun/src/e_j1f.c index 88e2d83..0cca823 100644 --- a/libm/upstream-freebsd/lib/msun/src/e_j1f.c +++ b/libm/upstream-freebsd/lib/msun/src/e_j1f.c @@ -14,12 +14,18 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/lib/msun/src/e_j1f.c 283032 2015-05-17 16:27:06Z kargl $"); + +/* + * See e_j1.c for complete comments. + */ #include "math.h" #include "math_private.h" -static float ponef(float), qonef(float); +static __inline float ponef(float), qonef(float); + +static const volatile float vone = 1, vzero = 0; static const float huge = 1e30, @@ -63,7 +69,7 @@ __ieee754_j1f(float x) * j1(x) = 1/sqrt(pi) * (P(1,x)*cc - Q(1,x)*ss) / sqrt(x) * y1(x) = 1/sqrt(pi) * (P(1,x)*ss + Q(1,x)*cc) / sqrt(x) */ - if(ix>0x80000000) z = (invsqrtpi*cc)/sqrtf(y); + if(ix>0x58000000) z = (invsqrtpi*cc)/sqrtf(y); /* |x|>2**49 */ else { u = ponef(y); v = qonef(y); z = invsqrtpi*(u*cc-v*ss)/sqrtf(y); @@ -71,7 +77,7 @@ __ieee754_j1f(float x) if(hx<0) return -z; else return z; } - if(ix<0x32000000) { /* |x|<2**-27 */ + if(ix<0x39000000) { /* |x|<2**-13 */ if(huge+x>one) return (float)0.5*x;/* inexact if x!=0 necessary */ } z = x*x; @@ -104,10 +110,9 @@ __ieee754_y1f(float x) GET_FLOAT_WORD(hx,x); ix = 0x7fffffff&hx; - /* if Y1(NaN) is NaN, Y1(-inf) is NaN, Y1(inf) is 0 */ - if(ix>=0x7f800000) return one/(x+x*x); - if(ix==0) return -one/zero; - if(hx<0) return zero/zero; + if(ix>=0x7f800000) return vone/(x+x*x); + if(ix==0) return -one/vzero; + if(hx<0) return vzero/vzero; if(ix >= 0x40000000) { /* |x| >= 2.0 */ s = sinf(x); c = cosf(x); @@ -129,14 +134,14 @@ __ieee754_y1f(float x) * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) * to compute the worse one. */ - if(ix>0x48000000) z = (invsqrtpi*ss)/sqrtf(x); + if(ix>0x58000000) z = (invsqrtpi*ss)/sqrtf(x); /* |x|>2**49 */ else { u = ponef(x); v = qonef(x); z = invsqrtpi*(u*ss+v*cc)/sqrtf(x); } return z; } - if(ix<=0x24800000) { /* x < 2**-54 */ + if(ix<=0x33000000) { /* x < 2**-25 */ return(-tpi/x); } z = x*x; @@ -219,7 +224,8 @@ static const float ps2[5] = { 8.3646392822e+00, /* 0x4105d590 */ }; - static float ponef(float x) +static __inline float +ponef(float x) { const float *p,*q; float z,r,s; @@ -227,9 +233,9 @@ static const float ps2[5] = { GET_FLOAT_WORD(ix,x); ix &= 0x7fffffff; if(ix>=0x41000000) {p = pr8; q= ps8;} - else if(ix>=0x40f71c58){p = pr5; q= ps5;} - else if(ix>=0x4036db68){p = pr3; q= ps3;} - else if(ix>=0x40000000){p = pr2; q= ps2;} + else if(ix>=0x409173eb){p = pr5; q= ps5;} + else if(ix>=0x4036d917){p = pr3; q= ps3;} + else {p = pr2; q= ps2;} /* ix>=0x40000000 */ z = one/(x*x); r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4])))); @@ -315,17 +321,18 @@ static const float qs2[6] = { -4.9594988823e+00, /* 0xc09eb437 */ }; - static float qonef(float x) +static __inline float +qonef(float x) { const float *p,*q; float s,r,z; int32_t ix; GET_FLOAT_WORD(ix,x); ix &= 0x7fffffff; - if(ix>=0x40200000) {p = qr8; q= qs8;} - else if(ix>=0x40f71c58){p = qr5; q= qs5;} - else if(ix>=0x4036db68){p = qr3; q= qs3;} - else if(ix>=0x40000000){p = qr2; q= qs2;} + if(ix>=0x41000000) {p = qr8; q= qs8;} + else if(ix>=0x409173eb){p = qr5; q= qs5;} + else if(ix>=0x4036d917){p = qr3; q= qs3;} + else {p = qr2; q= qs2;} /* ix>=0x40000000 */ z = one/(x*x); r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5]))))); diff --git a/libm/upstream-freebsd/lib/msun/src/e_jn.c b/libm/upstream-freebsd/lib/msun/src/e_jn.c index 8b0bc62..a1130c5 100644 --- a/libm/upstream-freebsd/lib/msun/src/e_jn.c +++ b/libm/upstream-freebsd/lib/msun/src/e_jn.c @@ -12,7 +12,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/lib/msun/src/e_jn.c 279856 2015-03-10 17:10:54Z kargl $"); /* * __ieee754_jn(n, x), __ieee754_yn(n, x) @@ -43,6 +43,8 @@ __FBSDID("$FreeBSD$"); #include "math.h" #include "math_private.h" +static const volatile double vone = 1, vzero = 0; + static const double invsqrtpi= 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */ two = 2.00000000000000000000e+00, /* 0x40000000, 0x00000000 */ @@ -220,10 +222,12 @@ __ieee754_yn(int n, double x) EXTRACT_WORDS(hx,lx,x); ix = 0x7fffffff&hx; - /* if Y(n,NaN) is NaN */ + /* yn(n,NaN) = NaN */ if((ix|((u_int32_t)(lx|-lx))>>31)>0x7ff00000) return x+x; - if((ix|lx)==0) return -one/zero; - if(hx<0) return zero/zero; + /* yn(n,+-0) = -inf and raise divide-by-zero exception. */ + if((ix|lx)==0) return -one/vzero; + /* yn(n,x<0) = NaN and raise invalid exception. */ + if(hx<0) return vzero/vzero; sign = 1; if(n<0){ n = -n; diff --git a/libm/upstream-freebsd/lib/msun/src/e_jnf.c b/libm/upstream-freebsd/lib/msun/src/e_jnf.c index f564aec..c82d5cf 100644 --- a/libm/upstream-freebsd/lib/msun/src/e_jnf.c +++ b/libm/upstream-freebsd/lib/msun/src/e_jnf.c @@ -14,11 +14,17 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/lib/msun/src/e_jnf.c 279856 2015-03-10 17:10:54Z kargl $"); + +/* + * See e_jn.c for complete comments. + */ #include "math.h" #include "math_private.h" +static const volatile float vone = 1, vzero = 0; + static const float two = 2.0000000000e+00, /* 0x40000000 */ one = 1.0000000000e+00; /* 0x3F800000 */ @@ -172,10 +178,9 @@ __ieee754_ynf(int n, float x) GET_FLOAT_WORD(hx,x); ix = 0x7fffffff&hx; - /* if Y(n,NaN) is NaN */ if(ix>0x7f800000) return x+x; - if(ix==0) return -one/zero; - if(hx<0) return zero/zero; + if(ix==0) return -one/vzero; + if(hx<0) return vzero/vzero; sign = 1; if(n<0){ n = -n; diff --git a/libm/upstream-freebsd/lib/msun/src/k_exp.c b/libm/upstream-freebsd/lib/msun/src/k_exp.c index f592f69..5aa3ef3 100644 --- a/libm/upstream-freebsd/lib/msun/src/k_exp.c +++ b/libm/upstream-freebsd/lib/msun/src/k_exp.c @@ -25,7 +25,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/lib/msun/src/k_exp.c 275819 2014-12-16 09:21:56Z ed $"); #include <complex.h> @@ -103,6 +103,6 @@ __ldexp_cexp(double complex z, int expt) half_expt = expt - half_expt; INSERT_WORDS(scale2, (0x3ff + half_expt) << 20, 0); - return (cpack(cos(y) * exp_x * scale1 * scale2, + return (CMPLX(cos(y) * exp_x * scale1 * scale2, sin(y) * exp_x * scale1 * scale2)); } diff --git a/libm/upstream-freebsd/lib/msun/src/k_expf.c b/libm/upstream-freebsd/lib/msun/src/k_expf.c index 548a008..8fe8c46 100644 --- a/libm/upstream-freebsd/lib/msun/src/k_expf.c +++ b/libm/upstream-freebsd/lib/msun/src/k_expf.c @@ -25,7 +25,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/lib/msun/src/k_expf.c 275819 2014-12-16 09:21:56Z ed $"); #include <complex.h> @@ -82,6 +82,6 @@ __ldexp_cexpf(float complex z, int expt) half_expt = expt - half_expt; SET_FLOAT_WORD(scale2, (0x7f + half_expt) << 23); - return (cpackf(cosf(y) * exp_x * scale1 * scale2, + return (CMPLXF(cosf(y) * exp_x * scale1 * scale2, sinf(y) * exp_x * scale1 * scale2)); } diff --git a/libm/upstream-freebsd/lib/msun/src/math_private.h b/libm/upstream-freebsd/lib/msun/src/math_private.h index 8af2c65..1f10e8b 100644 --- a/libm/upstream-freebsd/lib/msun/src/math_private.h +++ b/libm/upstream-freebsd/lib/msun/src/math_private.h @@ -11,7 +11,7 @@ /* * from: @(#)fdlibm.h 5.1 93/09/24 - * $FreeBSD$ + * $FreeBSD: head/lib/msun/src/math_private.h 276176 2014-12-24 10:13:53Z ed $ */ #ifndef _MATH_PRIVATE_H_ @@ -454,9 +454,15 @@ typedef union { * (0.0+I)*(y+0.0*I) and laboriously computing the full complex product. * In particular, I*Inf is corrupted to NaN+I*Inf, and I*-0 is corrupted * to -0.0+I*0.0. + * + * The C11 standard introduced the macros CMPLX(), CMPLXF() and CMPLXL() + * to construct complex values. Compilers that conform to the C99 + * standard require the following functions to avoid the above issues. */ + +#ifndef CMPLXF static __inline float complex -cpackf(float x, float y) +CMPLXF(float x, float y) { float_complex z; @@ -464,9 +470,11 @@ cpackf(float x, float y) IMAGPART(z) = y; return (z.f); } +#endif +#ifndef CMPLX static __inline double complex -cpack(double x, double y) +CMPLX(double x, double y) { double_complex z; @@ -474,9 +482,11 @@ cpack(double x, double y) IMAGPART(z) = y; return (z.f); } +#endif +#ifndef CMPLXL static __inline long double complex -cpackl(long double x, long double y) +CMPLXL(long double x, long double y) { long_double_complex z; @@ -484,6 +494,8 @@ cpackl(long double x, long double y) IMAGPART(z) = y; return (z.f); } +#endif + #endif /* _COMPLEX_H */ #ifdef __GNUCLIKE_ASM diff --git a/libm/upstream-freebsd/lib/msun/src/s_ccosh.c b/libm/upstream-freebsd/lib/msun/src/s_ccosh.c index 9ea962b..e544e91 100644 --- a/libm/upstream-freebsd/lib/msun/src/s_ccosh.c +++ b/libm/upstream-freebsd/lib/msun/src/s_ccosh.c @@ -32,10 +32,12 @@ * * Exceptional values are noted in the comments within the source code. * These values and the return value were taken from n1124.pdf. + * The sign of the result for some exceptional values is unspecified but + * must satisfy both cosh(conj(z)) == conj(cosh(z)) and cosh(-z) == cosh(z). */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/lib/msun/src/s_ccosh.c 284423 2015-06-15 20:11:06Z tijl $"); #include <complex.h> #include <math.h> @@ -62,49 +64,48 @@ ccosh(double complex z) /* Handle the nearly-non-exceptional cases where x and y are finite. */ if (ix < 0x7ff00000 && iy < 0x7ff00000) { if ((iy | ly) == 0) - return (cpack(cosh(x), x * y)); - if (ix < 0x40360000) /* small x: normal case */ - return (cpack(cosh(x) * cos(y), sinh(x) * sin(y))); + return (CMPLX(cosh(x), x * y)); + if (ix < 0x40360000) /* |x| < 22: normal case */ + return (CMPLX(cosh(x) * cos(y), sinh(x) * sin(y))); /* |x| >= 22, so cosh(x) ~= exp(|x|) */ if (ix < 0x40862e42) { /* x < 710: exp(|x|) won't overflow */ h = exp(fabs(x)) * 0.5; - return (cpack(h * cos(y), copysign(h, x) * sin(y))); + return (CMPLX(h * cos(y), copysign(h, x) * sin(y))); } else if (ix < 0x4096bbaa) { /* x < 1455: scale to avoid overflow */ - z = __ldexp_cexp(cpack(fabs(x), y), -1); - return (cpack(creal(z), cimag(z) * copysign(1, x))); + z = __ldexp_cexp(CMPLX(fabs(x), y), -1); + return (CMPLX(creal(z), cimag(z) * copysign(1, x))); } else { /* x >= 1455: the result always overflows */ h = huge * x; - return (cpack(h * h * cos(y), h * sin(y))); + return (CMPLX(h * h * cos(y), h * sin(y))); } } /* - * cosh(+-0 +- I Inf) = dNaN + I sign(d(+-0, dNaN))0. - * The sign of 0 in the result is unspecified. Choice = normally - * the same as dNaN. Raise the invalid floating-point exception. + * cosh(+-0 +- I Inf) = dNaN + I (+-)(+-)0. + * The sign of 0 in the result is unspecified. Choice = product + * of the signs of the argument. Raise the invalid floating-point + * exception. * - * cosh(+-0 +- I NaN) = d(NaN) + I sign(d(+-0, NaN))0. - * The sign of 0 in the result is unspecified. Choice = normally - * the same as d(NaN). + * cosh(+-0 +- I NaN) = d(NaN) + I (+-)(+-)0. + * The sign of 0 in the result is unspecified. Choice = product + * of the signs of the argument. */ - if ((ix | lx) == 0 && iy >= 0x7ff00000) - return (cpack(y - y, copysign(0, x * (y - y)))); + if ((ix | lx) == 0) /* && iy >= 0x7ff00000 */ + return (CMPLX(y - y, x * copysign(0, y))); /* * cosh(+-Inf +- I 0) = +Inf + I (+-)(+-)0. * - * cosh(NaN +- I 0) = d(NaN) + I sign(d(NaN, +-0))0. - * The sign of 0 in the result is unspecified. + * cosh(NaN +- I 0) = d(NaN) + I (+-)(+-)0. + * The sign of 0 in the result is unspecified. Choice = product + * of the signs of the argument. */ - if ((iy | ly) == 0 && ix >= 0x7ff00000) { - if (((hx & 0xfffff) | lx) == 0) - return (cpack(x * x, copysign(0, x) * y)); - return (cpack(x * x, copysign(0, (x + x) * y))); - } + if ((iy | ly) == 0) /* && ix >= 0x7ff00000 */ + return (CMPLX(x * x, copysign(0, x) * y)); /* * cosh(x +- I Inf) = dNaN + I dNaN. @@ -114,8 +115,8 @@ ccosh(double complex z) * Optionally raises the invalid floating-point exception for finite * nonzero x. Choice = don't raise (except for signaling NaNs). */ - if (ix < 0x7ff00000 && iy >= 0x7ff00000) - return (cpack(y - y, x * (y - y))); + if (ix < 0x7ff00000) /* && iy >= 0x7ff00000 */ + return (CMPLX(y - y, x * (y - y))); /* * cosh(+-Inf + I NaN) = +Inf + I d(NaN). @@ -126,10 +127,10 @@ ccosh(double complex z) * * cosh(+-Inf + I y) = +Inf cos(y) +- I Inf sin(y) */ - if (ix >= 0x7ff00000 && ((hx & 0xfffff) | lx) == 0) { + if (ix == 0x7ff00000 && lx == 0) { if (iy >= 0x7ff00000) - return (cpack(x * x, x * (y - y))); - return (cpack((x * x) * cos(y), x * sin(y))); + return (CMPLX(INFINITY, x * (y - y))); + return (CMPLX(INFINITY * cos(y), x * sin(y))); } /* @@ -143,7 +144,7 @@ ccosh(double complex z) * Optionally raises the invalid floating-point exception for finite * nonzero y. Choice = don't raise (except for signaling NaNs). */ - return (cpack((x * x) * (y - y), (x + x) * (y - y))); + return (CMPLX((x * x) * (y - y), (x + x) * (y - y))); } double complex @@ -151,5 +152,5 @@ ccos(double complex z) { /* ccos(z) = ccosh(I * z) */ - return (ccosh(cpack(-cimag(z), creal(z)))); + return (ccosh(CMPLX(-cimag(z), creal(z)))); } diff --git a/libm/upstream-freebsd/lib/msun/src/s_ccoshf.c b/libm/upstream-freebsd/lib/msun/src/s_ccoshf.c index 1de9ad4..e33840a 100644 --- a/libm/upstream-freebsd/lib/msun/src/s_ccoshf.c +++ b/libm/upstream-freebsd/lib/msun/src/s_ccoshf.c @@ -25,11 +25,11 @@ */ /* - * Hyperbolic cosine of a complex argument. See s_ccosh.c for details. + * Float version of ccosh(). See s_ccosh.c for details. */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/lib/msun/src/s_ccoshf.c 284423 2015-06-15 20:11:06Z tijl $"); #include <complex.h> #include <math.h> @@ -55,50 +55,47 @@ ccoshf(float complex z) if (ix < 0x7f800000 && iy < 0x7f800000) { if (iy == 0) - return (cpackf(coshf(x), x * y)); - if (ix < 0x41100000) /* small x: normal case */ - return (cpackf(coshf(x) * cosf(y), sinhf(x) * sinf(y))); + return (CMPLXF(coshf(x), x * y)); + if (ix < 0x41100000) /* |x| < 9: normal case */ + return (CMPLXF(coshf(x) * cosf(y), sinhf(x) * sinf(y))); /* |x| >= 9, so cosh(x) ~= exp(|x|) */ if (ix < 0x42b17218) { /* x < 88.7: expf(|x|) won't overflow */ - h = expf(fabsf(x)) * 0.5f; - return (cpackf(h * cosf(y), copysignf(h, x) * sinf(y))); + h = expf(fabsf(x)) * 0.5F; + return (CMPLXF(h * cosf(y), copysignf(h, x) * sinf(y))); } else if (ix < 0x4340b1e7) { /* x < 192.7: scale to avoid overflow */ - z = __ldexp_cexpf(cpackf(fabsf(x), y), -1); - return (cpackf(crealf(z), cimagf(z) * copysignf(1, x))); + z = __ldexp_cexpf(CMPLXF(fabsf(x), y), -1); + return (CMPLXF(crealf(z), cimagf(z) * copysignf(1, x))); } else { /* x >= 192.7: the result always overflows */ h = huge * x; - return (cpackf(h * h * cosf(y), h * sinf(y))); + return (CMPLXF(h * h * cosf(y), h * sinf(y))); } } - if (ix == 0 && iy >= 0x7f800000) - return (cpackf(y - y, copysignf(0, x * (y - y)))); + if (ix == 0) /* && iy >= 0x7f800000 */ + return (CMPLXF(y - y, x * copysignf(0, y))); - if (iy == 0 && ix >= 0x7f800000) { - if ((hx & 0x7fffff) == 0) - return (cpackf(x * x, copysignf(0, x) * y)); - return (cpackf(x * x, copysignf(0, (x + x) * y))); - } + if (iy == 0) /* && ix >= 0x7f800000 */ + return (CMPLXF(x * x, copysignf(0, x) * y)); - if (ix < 0x7f800000 && iy >= 0x7f800000) - return (cpackf(y - y, x * (y - y))); + if (ix < 0x7f800000) /* && iy >= 0x7f800000 */ + return (CMPLXF(y - y, x * (y - y))); - if (ix >= 0x7f800000 && (hx & 0x7fffff) == 0) { + if (ix == 0x7f800000) { if (iy >= 0x7f800000) - return (cpackf(x * x, x * (y - y))); - return (cpackf((x * x) * cosf(y), x * sinf(y))); + return (CMPLXF(INFINITY, x * (y - y))); + return (CMPLXF(INFINITY * cosf(y), x * sinf(y))); } - return (cpackf((x * x) * (y - y), (x + x) * (y - y))); + return (CMPLXF((x * x) * (y - y), (x + x) * (y - y))); } float complex ccosf(float complex z) { - return (ccoshf(cpackf(-cimagf(z), crealf(z)))); + return (ccoshf(CMPLXF(-cimagf(z), crealf(z)))); } diff --git a/libm/upstream-freebsd/lib/msun/src/s_cexp.c b/libm/upstream-freebsd/lib/msun/src/s_cexp.c index abe178f..660a68d 100644 --- a/libm/upstream-freebsd/lib/msun/src/s_cexp.c +++ b/libm/upstream-freebsd/lib/msun/src/s_cexp.c @@ -25,7 +25,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/lib/msun/src/s_cexp.c 275819 2014-12-16 09:21:56Z ed $"); #include <complex.h> #include <math.h> @@ -50,22 +50,22 @@ cexp(double complex z) /* cexp(x + I 0) = exp(x) + I 0 */ if ((hy | ly) == 0) - return (cpack(exp(x), y)); + return (CMPLX(exp(x), y)); EXTRACT_WORDS(hx, lx, x); /* cexp(0 + I y) = cos(y) + I sin(y) */ if (((hx & 0x7fffffff) | lx) == 0) - return (cpack(cos(y), sin(y))); + return (CMPLX(cos(y), sin(y))); if (hy >= 0x7ff00000) { if (lx != 0 || (hx & 0x7fffffff) != 0x7ff00000) { /* cexp(finite|NaN +- I Inf|NaN) = NaN + I NaN */ - return (cpack(y - y, y - y)); + return (CMPLX(y - y, y - y)); } else if (hx & 0x80000000) { /* cexp(-Inf +- I Inf|NaN) = 0 + I 0 */ - return (cpack(0.0, 0.0)); + return (CMPLX(0.0, 0.0)); } else { /* cexp(+Inf +- I Inf|NaN) = Inf + I NaN */ - return (cpack(x, y - y)); + return (CMPLX(x, y - y)); } } @@ -84,6 +84,6 @@ cexp(double complex z) * - x = NaN (spurious inexact exception from y) */ exp_x = exp(x); - return (cpack(exp_x * cos(y), exp_x * sin(y))); + return (CMPLX(exp_x * cos(y), exp_x * sin(y))); } } diff --git a/libm/upstream-freebsd/lib/msun/src/s_cexpf.c b/libm/upstream-freebsd/lib/msun/src/s_cexpf.c index 0e30d08..709ad47 100644 --- a/libm/upstream-freebsd/lib/msun/src/s_cexpf.c +++ b/libm/upstream-freebsd/lib/msun/src/s_cexpf.c @@ -25,7 +25,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/lib/msun/src/s_cexpf.c 275819 2014-12-16 09:21:56Z ed $"); #include <complex.h> #include <math.h> @@ -50,22 +50,22 @@ cexpf(float complex z) /* cexp(x + I 0) = exp(x) + I 0 */ if (hy == 0) - return (cpackf(expf(x), y)); + return (CMPLXF(expf(x), y)); GET_FLOAT_WORD(hx, x); /* cexp(0 + I y) = cos(y) + I sin(y) */ if ((hx & 0x7fffffff) == 0) - return (cpackf(cosf(y), sinf(y))); + return (CMPLXF(cosf(y), sinf(y))); if (hy >= 0x7f800000) { if ((hx & 0x7fffffff) != 0x7f800000) { /* cexp(finite|NaN +- I Inf|NaN) = NaN + I NaN */ - return (cpackf(y - y, y - y)); + return (CMPLXF(y - y, y - y)); } else if (hx & 0x80000000) { /* cexp(-Inf +- I Inf|NaN) = 0 + I 0 */ - return (cpackf(0.0, 0.0)); + return (CMPLXF(0.0, 0.0)); } else { /* cexp(+Inf +- I Inf|NaN) = Inf + I NaN */ - return (cpackf(x, y - y)); + return (CMPLXF(x, y - y)); } } @@ -84,6 +84,6 @@ cexpf(float complex z) * - x = NaN (spurious inexact exception from y) */ exp_x = expf(x); - return (cpackf(exp_x * cosf(y), exp_x * sinf(y))); + return (CMPLXF(exp_x * cosf(y), exp_x * sinf(y))); } } diff --git a/libm/upstream-freebsd/lib/msun/src/s_conj.c b/libm/upstream-freebsd/lib/msun/src/s_conj.c index 5770c29..61fac63 100644 --- a/libm/upstream-freebsd/lib/msun/src/s_conj.c +++ b/libm/upstream-freebsd/lib/msun/src/s_conj.c @@ -23,7 +23,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD$ + * $FreeBSD: head/lib/msun/src/s_conj.c 275819 2014-12-16 09:21:56Z ed $ */ #include <complex.h> @@ -34,5 +34,5 @@ double complex conj(double complex z) { - return (cpack(creal(z), -cimag(z))); + return (CMPLX(creal(z), -cimag(z))); } diff --git a/libm/upstream-freebsd/lib/msun/src/s_conjf.c b/libm/upstream-freebsd/lib/msun/src/s_conjf.c index b090760..83c9ef0 100644 --- a/libm/upstream-freebsd/lib/msun/src/s_conjf.c +++ b/libm/upstream-freebsd/lib/msun/src/s_conjf.c @@ -23,7 +23,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD$ + * $FreeBSD: head/lib/msun/src/s_conjf.c 275819 2014-12-16 09:21:56Z ed $ */ #include <complex.h> @@ -34,5 +34,5 @@ float complex conjf(float complex z) { - return (cpackf(crealf(z), -cimagf(z))); + return (CMPLXF(crealf(z), -cimagf(z))); } diff --git a/libm/upstream-freebsd/lib/msun/src/s_conjl.c b/libm/upstream-freebsd/lib/msun/src/s_conjl.c index 0e431ef..d9e6a16 100644 --- a/libm/upstream-freebsd/lib/msun/src/s_conjl.c +++ b/libm/upstream-freebsd/lib/msun/src/s_conjl.c @@ -23,7 +23,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD$ + * $FreeBSD: head/lib/msun/src/s_conjl.c 275819 2014-12-16 09:21:56Z ed $ */ #include <complex.h> @@ -34,5 +34,5 @@ long double complex conjl(long double complex z) { - return (cpackl(creall(z), -cimagl(z))); + return (CMPLXL(creall(z), -cimagl(z))); } diff --git a/libm/upstream-freebsd/lib/msun/src/s_cproj.c b/libm/upstream-freebsd/lib/msun/src/s_cproj.c index 8e9404c..ec2266e 100644 --- a/libm/upstream-freebsd/lib/msun/src/s_cproj.c +++ b/libm/upstream-freebsd/lib/msun/src/s_cproj.c @@ -25,7 +25,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/lib/msun/src/s_cproj.c 275819 2014-12-16 09:21:56Z ed $"); #include <complex.h> #include <math.h> @@ -39,7 +39,7 @@ cproj(double complex z) if (!isinf(creal(z)) && !isinf(cimag(z))) return (z); else - return (cpack(INFINITY, copysign(0.0, cimag(z)))); + return (CMPLX(INFINITY, copysign(0.0, cimag(z)))); } #if LDBL_MANT_DIG == 53 diff --git a/libm/upstream-freebsd/lib/msun/src/s_cprojf.c b/libm/upstream-freebsd/lib/msun/src/s_cprojf.c index 68ea77b..63af75f 100644 --- a/libm/upstream-freebsd/lib/msun/src/s_cprojf.c +++ b/libm/upstream-freebsd/lib/msun/src/s_cprojf.c @@ -25,7 +25,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/lib/msun/src/s_cprojf.c 275819 2014-12-16 09:21:56Z ed $"); #include <complex.h> #include <math.h> @@ -39,5 +39,5 @@ cprojf(float complex z) if (!isinf(crealf(z)) && !isinf(cimagf(z))) return (z); else - return (cpackf(INFINITY, copysignf(0.0, cimagf(z)))); + return (CMPLXF(INFINITY, copysignf(0.0, cimagf(z)))); } diff --git a/libm/upstream-freebsd/lib/msun/src/s_cprojl.c b/libm/upstream-freebsd/lib/msun/src/s_cprojl.c index 07385bc..8386f81 100644 --- a/libm/upstream-freebsd/lib/msun/src/s_cprojl.c +++ b/libm/upstream-freebsd/lib/msun/src/s_cprojl.c @@ -25,7 +25,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/lib/msun/src/s_cprojl.c 275819 2014-12-16 09:21:56Z ed $"); #include <complex.h> #include <math.h> @@ -39,5 +39,5 @@ cprojl(long double complex z) if (!isinf(creall(z)) && !isinf(cimagl(z))) return (z); else - return (cpackl(INFINITY, copysignl(0.0, cimagl(z)))); + return (CMPLXL(INFINITY, copysignl(0.0, cimagl(z)))); } diff --git a/libm/upstream-freebsd/lib/msun/src/s_csinh.c b/libm/upstream-freebsd/lib/msun/src/s_csinh.c index c192f30..cff1402 100644 --- a/libm/upstream-freebsd/lib/msun/src/s_csinh.c +++ b/libm/upstream-freebsd/lib/msun/src/s_csinh.c @@ -32,10 +32,12 @@ * * Exceptional values are noted in the comments within the source code. * These values and the return value were taken from n1124.pdf. + * The sign of the result for some exceptional values is unspecified but + * must satisfy both sinh(conj(z)) == conj(sinh(z)) and sinh(-z) == -sinh(z). */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/lib/msun/src/s_csinh.c 284426 2015-06-15 20:16:53Z tijl $"); #include <complex.h> #include <math.h> @@ -62,48 +64,45 @@ csinh(double complex z) /* Handle the nearly-non-exceptional cases where x and y are finite. */ if (ix < 0x7ff00000 && iy < 0x7ff00000) { if ((iy | ly) == 0) - return (cpack(sinh(x), y)); - if (ix < 0x40360000) /* small x: normal case */ - return (cpack(sinh(x) * cos(y), cosh(x) * sin(y))); + return (CMPLX(sinh(x), y)); + if (ix < 0x40360000) /* |x| < 22: normal case */ + return (CMPLX(sinh(x) * cos(y), cosh(x) * sin(y))); /* |x| >= 22, so cosh(x) ~= exp(|x|) */ if (ix < 0x40862e42) { /* x < 710: exp(|x|) won't overflow */ h = exp(fabs(x)) * 0.5; - return (cpack(copysign(h, x) * cos(y), h * sin(y))); + return (CMPLX(copysign(h, x) * cos(y), h * sin(y))); } else if (ix < 0x4096bbaa) { /* x < 1455: scale to avoid overflow */ - z = __ldexp_cexp(cpack(fabs(x), y), -1); - return (cpack(creal(z) * copysign(1, x), cimag(z))); + z = __ldexp_cexp(CMPLX(fabs(x), y), -1); + return (CMPLX(creal(z) * copysign(1, x), cimag(z))); } else { /* x >= 1455: the result always overflows */ h = huge * x; - return (cpack(h * cos(y), h * h * sin(y))); + return (CMPLX(h * cos(y), h * h * sin(y))); } } /* - * sinh(+-0 +- I Inf) = sign(d(+-0, dNaN))0 + I dNaN. - * The sign of 0 in the result is unspecified. Choice = normally - * the same as dNaN. Raise the invalid floating-point exception. + * sinh(+-0 +- I Inf) = +-0 + I dNaN. + * The sign of 0 in the result is unspecified. Choice = same sign + * as the argument. Raise the invalid floating-point exception. * - * sinh(+-0 +- I NaN) = sign(d(+-0, NaN))0 + I d(NaN). - * The sign of 0 in the result is unspecified. Choice = normally - * the same as d(NaN). + * sinh(+-0 +- I NaN) = +-0 + I d(NaN). + * The sign of 0 in the result is unspecified. Choice = same sign + * as the argument. */ - if ((ix | lx) == 0 && iy >= 0x7ff00000) - return (cpack(copysign(0, x * (y - y)), y - y)); + if ((ix | lx) == 0) /* && iy >= 0x7ff00000 */ + return (CMPLX(x, y - y)); /* * sinh(+-Inf +- I 0) = +-Inf + I +-0. * * sinh(NaN +- I 0) = d(NaN) + I +-0. */ - if ((iy | ly) == 0 && ix >= 0x7ff00000) { - if (((hx & 0xfffff) | lx) == 0) - return (cpack(x, y)); - return (cpack(x, copysign(0, y))); - } + if ((iy | ly) == 0) /* && ix >= 0x7ff00000 */ + return (CMPLX(x + x, y)); /* * sinh(x +- I Inf) = dNaN + I dNaN. @@ -113,45 +112,45 @@ csinh(double complex z) * Optionally raises the invalid floating-point exception for finite * nonzero x. Choice = don't raise (except for signaling NaNs). */ - if (ix < 0x7ff00000 && iy >= 0x7ff00000) - return (cpack(y - y, x * (y - y))); + if (ix < 0x7ff00000) /* && iy >= 0x7ff00000 */ + return (CMPLX(y - y, y - y)); /* * sinh(+-Inf + I NaN) = +-Inf + I d(NaN). - * The sign of Inf in the result is unspecified. Choice = normally - * the same as d(NaN). + * The sign of Inf in the result is unspecified. Choice = same sign + * as the argument. * - * sinh(+-Inf +- I Inf) = +Inf + I dNaN. - * The sign of Inf in the result is unspecified. Choice = always +. - * Raise the invalid floating-point exception. + * sinh(+-Inf +- I Inf) = +-Inf + I dNaN. + * The sign of Inf in the result is unspecified. Choice = same sign + * as the argument. Raise the invalid floating-point exception. * * sinh(+-Inf + I y) = +-Inf cos(y) + I Inf sin(y) */ - if (ix >= 0x7ff00000 && ((hx & 0xfffff) | lx) == 0) { + if (ix == 0x7ff00000 && lx == 0) { if (iy >= 0x7ff00000) - return (cpack(x * x, x * (y - y))); - return (cpack(x * cos(y), INFINITY * sin(y))); + return (CMPLX(x, y - y)); + return (CMPLX(x * cos(y), INFINITY * sin(y))); } /* - * sinh(NaN + I NaN) = d(NaN) + I d(NaN). + * sinh(NaN1 + I NaN2) = d(NaN1, NaN2) + I d(NaN1, NaN2). * - * sinh(NaN +- I Inf) = d(NaN) + I d(NaN). + * sinh(NaN +- I Inf) = d(NaN, dNaN) + I d(NaN, dNaN). * Optionally raises the invalid floating-point exception. * Choice = raise. * - * sinh(NaN + I y) = d(NaN) + I d(NaN). + * sinh(NaN + I y) = d(NaN) + I d(NaN). * Optionally raises the invalid floating-point exception for finite * nonzero y. Choice = don't raise (except for signaling NaNs). */ - return (cpack((x * x) * (y - y), (x + x) * (y - y))); + return (CMPLX((x + x) * (y - y), (x * x) * (y - y))); } double complex csin(double complex z) { - /* csin(z) = -I * csinh(I * z) */ - z = csinh(cpack(-cimag(z), creal(z))); - return (cpack(cimag(z), -creal(z))); + /* csin(z) = -I * csinh(I * z) = I * conj(csinh(I * conj(z))). */ + z = csinh(CMPLX(cimag(z), creal(z))); + return (CMPLX(cimag(z), creal(z))); } diff --git a/libm/upstream-freebsd/lib/msun/src/s_csinhf.c b/libm/upstream-freebsd/lib/msun/src/s_csinhf.c index c523125..f050890 100644 --- a/libm/upstream-freebsd/lib/msun/src/s_csinhf.c +++ b/libm/upstream-freebsd/lib/msun/src/s_csinhf.c @@ -25,11 +25,11 @@ */ /* - * Hyperbolic sine of a complex argument z. See s_csinh.c for details. + * Float version of csinh(). See s_csinh.c for details. */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/lib/msun/src/s_csinhf.c 284426 2015-06-15 20:16:53Z tijl $"); #include <complex.h> #include <math.h> @@ -55,51 +55,48 @@ csinhf(float complex z) if (ix < 0x7f800000 && iy < 0x7f800000) { if (iy == 0) - return (cpackf(sinhf(x), y)); - if (ix < 0x41100000) /* small x: normal case */ - return (cpackf(sinhf(x) * cosf(y), coshf(x) * sinf(y))); + return (CMPLXF(sinhf(x), y)); + if (ix < 0x41100000) /* |x| < 9: normal case */ + return (CMPLXF(sinhf(x) * cosf(y), coshf(x) * sinf(y))); /* |x| >= 9, so cosh(x) ~= exp(|x|) */ if (ix < 0x42b17218) { /* x < 88.7: expf(|x|) won't overflow */ - h = expf(fabsf(x)) * 0.5f; - return (cpackf(copysignf(h, x) * cosf(y), h * sinf(y))); + h = expf(fabsf(x)) * 0.5F; + return (CMPLXF(copysignf(h, x) * cosf(y), h * sinf(y))); } else if (ix < 0x4340b1e7) { /* x < 192.7: scale to avoid overflow */ - z = __ldexp_cexpf(cpackf(fabsf(x), y), -1); - return (cpackf(crealf(z) * copysignf(1, x), cimagf(z))); + z = __ldexp_cexpf(CMPLXF(fabsf(x), y), -1); + return (CMPLXF(crealf(z) * copysignf(1, x), cimagf(z))); } else { /* x >= 192.7: the result always overflows */ h = huge * x; - return (cpackf(h * cosf(y), h * h * sinf(y))); + return (CMPLXF(h * cosf(y), h * h * sinf(y))); } } - if (ix == 0 && iy >= 0x7f800000) - return (cpackf(copysignf(0, x * (y - y)), y - y)); + if (ix == 0) /* && iy >= 0x7f800000 */ + return (CMPLXF(x, y - y)); - if (iy == 0 && ix >= 0x7f800000) { - if ((hx & 0x7fffff) == 0) - return (cpackf(x, y)); - return (cpackf(x, copysignf(0, y))); - } + if (iy == 0) /* && ix >= 0x7f800000 */ + return (CMPLXF(x + x, y)); - if (ix < 0x7f800000 && iy >= 0x7f800000) - return (cpackf(y - y, x * (y - y))); + if (ix < 0x7f800000) /* && iy >= 0x7f800000 */ + return (CMPLXF(y - y, y - y)); - if (ix >= 0x7f800000 && (hx & 0x7fffff) == 0) { + if (ix == 0x7f800000) { if (iy >= 0x7f800000) - return (cpackf(x * x, x * (y - y))); - return (cpackf(x * cosf(y), INFINITY * sinf(y))); + return (CMPLXF(x, y - y)); + return (CMPLXF(x * cosf(y), INFINITY * sinf(y))); } - return (cpackf((x * x) * (y - y), (x + x) * (y - y))); + return (CMPLXF((x + x) * (y - y), (x * x) * (y - y))); } float complex csinf(float complex z) { - z = csinhf(cpackf(-cimagf(z), crealf(z))); - return (cpackf(cimagf(z), -crealf(z))); + z = csinhf(CMPLXF(cimagf(z), crealf(z))); + return (CMPLXF(cimagf(z), crealf(z))); } diff --git a/libm/upstream-freebsd/lib/msun/src/s_csqrt.c b/libm/upstream-freebsd/lib/msun/src/s_csqrt.c index 18a7ae3..c908a2d 100644 --- a/libm/upstream-freebsd/lib/msun/src/s_csqrt.c +++ b/libm/upstream-freebsd/lib/msun/src/s_csqrt.c @@ -25,7 +25,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/lib/msun/src/s_csqrt.c 275819 2014-12-16 09:21:56Z ed $"); #include <complex.h> #include <float.h> @@ -58,12 +58,12 @@ csqrt(double complex z) /* Handle special cases. */ if (z == 0) - return (cpack(0, b)); + return (CMPLX(0, b)); if (isinf(b)) - return (cpack(INFINITY, b)); + return (CMPLX(INFINITY, b)); if (isnan(a)) { t = (b - b) / (b - b); /* raise invalid if b is not a NaN */ - return (cpack(a, t)); /* return NaN + NaN i */ + return (CMPLX(a, t)); /* return NaN + NaN i */ } if (isinf(a)) { /* @@ -73,9 +73,9 @@ csqrt(double complex z) * csqrt(-inf + y i) = 0 + inf i */ if (signbit(a)) - return (cpack(fabs(b - b), copysign(a, b))); + return (CMPLX(fabs(b - b), copysign(a, b))); else - return (cpack(a, copysign(b - b, b))); + return (CMPLX(a, copysign(b - b, b))); } /* * The remaining special case (b is NaN) is handled just fine by @@ -94,10 +94,10 @@ csqrt(double complex z) /* Algorithm 312, CACM vol 10, Oct 1967. */ if (a >= 0) { t = sqrt((a + hypot(a, b)) * 0.5); - result = cpack(t, b / (2 * t)); + result = CMPLX(t, b / (2 * t)); } else { t = sqrt((-a + hypot(a, b)) * 0.5); - result = cpack(fabs(b) / (2 * t), copysign(t, b)); + result = CMPLX(fabs(b) / (2 * t), copysign(t, b)); } /* Rescale. */ diff --git a/libm/upstream-freebsd/lib/msun/src/s_csqrtf.c b/libm/upstream-freebsd/lib/msun/src/s_csqrtf.c index da7fe18..12a894f 100644 --- a/libm/upstream-freebsd/lib/msun/src/s_csqrtf.c +++ b/libm/upstream-freebsd/lib/msun/src/s_csqrtf.c @@ -25,7 +25,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/lib/msun/src/s_csqrtf.c 275819 2014-12-16 09:21:56Z ed $"); #include <complex.h> #include <math.h> @@ -49,12 +49,12 @@ csqrtf(float complex z) /* Handle special cases. */ if (z == 0) - return (cpackf(0, b)); + return (CMPLXF(0, b)); if (isinf(b)) - return (cpackf(INFINITY, b)); + return (CMPLXF(INFINITY, b)); if (isnan(a)) { t = (b - b) / (b - b); /* raise invalid if b is not a NaN */ - return (cpackf(a, t)); /* return NaN + NaN i */ + return (CMPLXF(a, t)); /* return NaN + NaN i */ } if (isinf(a)) { /* @@ -64,9 +64,9 @@ csqrtf(float complex z) * csqrtf(-inf + y i) = 0 + inf i */ if (signbit(a)) - return (cpackf(fabsf(b - b), copysignf(a, b))); + return (CMPLXF(fabsf(b - b), copysignf(a, b))); else - return (cpackf(a, copysignf(b - b, b))); + return (CMPLXF(a, copysignf(b - b, b))); } /* * The remaining special case (b is NaN) is handled just fine by @@ -80,9 +80,9 @@ csqrtf(float complex z) */ if (a >= 0) { t = sqrt((a + hypot(a, b)) * 0.5); - return (cpackf(t, b / (2.0 * t))); + return (CMPLXF(t, b / (2.0 * t))); } else { t = sqrt((-a + hypot(a, b)) * 0.5); - return (cpackf(fabsf(b) / (2.0 * t), copysignf(t, b))); + return (CMPLXF(fabsf(b) / (2.0 * t), copysignf(t, b))); } } diff --git a/libm/upstream-freebsd/lib/msun/src/s_csqrtl.c b/libm/upstream-freebsd/lib/msun/src/s_csqrtl.c index dd18e1e..7bcff59 100644 --- a/libm/upstream-freebsd/lib/msun/src/s_csqrtl.c +++ b/libm/upstream-freebsd/lib/msun/src/s_csqrtl.c @@ -25,7 +25,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/lib/msun/src/s_csqrtl.c 275819 2014-12-16 09:21:56Z ed $"); #include <complex.h> #include <float.h> @@ -58,12 +58,12 @@ csqrtl(long double complex z) /* Handle special cases. */ if (z == 0) - return (cpackl(0, b)); + return (CMPLXL(0, b)); if (isinf(b)) - return (cpackl(INFINITY, b)); + return (CMPLXL(INFINITY, b)); if (isnan(a)) { t = (b - b) / (b - b); /* raise invalid if b is not a NaN */ - return (cpackl(a, t)); /* return NaN + NaN i */ + return (CMPLXL(a, t)); /* return NaN + NaN i */ } if (isinf(a)) { /* @@ -73,9 +73,9 @@ csqrtl(long double complex z) * csqrt(-inf + y i) = 0 + inf i */ if (signbit(a)) - return (cpackl(fabsl(b - b), copysignl(a, b))); + return (CMPLXL(fabsl(b - b), copysignl(a, b))); else - return (cpackl(a, copysignl(b - b, b))); + return (CMPLXL(a, copysignl(b - b, b))); } /* * The remaining special case (b is NaN) is handled just fine by @@ -94,10 +94,10 @@ csqrtl(long double complex z) /* Algorithm 312, CACM vol 10, Oct 1967. */ if (a >= 0) { t = sqrtl((a + hypotl(a, b)) * 0.5); - result = cpackl(t, b / (2 * t)); + result = CMPLXL(t, b / (2 * t)); } else { t = sqrtl((-a + hypotl(a, b)) * 0.5); - result = cpackl(fabsl(b) / (2 * t), copysignl(t, b)); + result = CMPLXL(fabsl(b) / (2 * t), copysignl(t, b)); } /* Rescale. */ diff --git a/libm/upstream-freebsd/lib/msun/src/s_ctanh.c b/libm/upstream-freebsd/lib/msun/src/s_ctanh.c index d427e28..e5973c3 100644 --- a/libm/upstream-freebsd/lib/msun/src/s_ctanh.c +++ b/libm/upstream-freebsd/lib/msun/src/s_ctanh.c @@ -25,7 +25,7 @@ */ /* - * Hyperbolic tangent of a complex argument z = x + i y. + * Hyperbolic tangent of a complex argument z = x + I y. * * The algorithm is from: * @@ -44,15 +44,15 @@ * * tanh(z) = sinh(z) / cosh(z) * - * sinh(x) cos(y) + i cosh(x) sin(y) + * sinh(x) cos(y) + I cosh(x) sin(y) * = --------------------------------- - * cosh(x) cos(y) + i sinh(x) sin(y) + * cosh(x) cos(y) + I sinh(x) sin(y) * - * cosh(x) sinh(x) / cos^2(y) + i tan(y) + * cosh(x) sinh(x) / cos^2(y) + I tan(y) * = ------------------------------------- * 1 + sinh^2(x) / cos^2(y) * - * beta rho s + i t + * beta rho s + I t * = ---------------- * 1 + beta s^2 * @@ -64,7 +64,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/lib/msun/src/s_ctanh.c 284427 2015-06-15 20:40:44Z tijl $"); #include <complex.h> #include <math.h> @@ -85,16 +85,16 @@ ctanh(double complex z) ix = hx & 0x7fffffff; /* - * ctanh(NaN + i 0) = NaN + i 0 + * ctanh(NaN +- I 0) = d(NaN) +- I 0 * - * ctanh(NaN + i y) = NaN + i NaN for y != 0 + * ctanh(NaN + I y) = d(NaN,y) + I d(NaN,y) for y != 0 * * The imaginary part has the sign of x*sin(2*y), but there's no * special effort to get this right. * - * ctanh(+-Inf +- i Inf) = +-1 +- 0 + * ctanh(+-Inf +- I Inf) = +-1 +- I 0 * - * ctanh(+-Inf + i y) = +-1 + 0 sin(2y) for y finite + * ctanh(+-Inf + I y) = +-1 + I 0 sin(2y) for y finite * * The imaginary part of the sign is unspecified. This special * case is only needed to avoid a spurious invalid exception when @@ -102,26 +102,27 @@ ctanh(double complex z) */ if (ix >= 0x7ff00000) { if ((ix & 0xfffff) | lx) /* x is NaN */ - return (cpack(x, (y == 0 ? y : x * y))); + return (CMPLX((x + 0) * (y + 0), + y == 0 ? y : (x + 0) * (y + 0))); SET_HIGH_WORD(x, hx - 0x40000000); /* x = copysign(1, x) */ - return (cpack(x, copysign(0, isinf(y) ? y : sin(y) * cos(y)))); + return (CMPLX(x, copysign(0, isinf(y) ? y : sin(y) * cos(y)))); } /* - * ctanh(x + i NAN) = NaN + i NaN - * ctanh(x +- i Inf) = NaN + i NaN + * ctanh(x + I NaN) = d(NaN) + I d(NaN) + * ctanh(x +- I Inf) = dNaN + I dNaN */ if (!isfinite(y)) - return (cpack(y - y, y - y)); + return (CMPLX(y - y, y - y)); /* - * ctanh(+-huge + i +-y) ~= +-1 +- i 2sin(2y)/exp(2x), using the + * ctanh(+-huge +- I y) ~= +-1 +- I 2sin(2y)/exp(2x), using the * approximation sinh^2(huge) ~= exp(2*huge) / 4. * We use a modified formula to avoid spurious overflow. */ - if (ix >= 0x40360000) { /* x >= 22 */ + if (ix >= 0x40360000) { /* |x| >= 22 */ double exp_mx = exp(-fabs(x)); - return (cpack(copysign(1, x), + return (CMPLX(copysign(1, x), 4 * sin(y) * cos(y) * exp_mx * exp_mx)); } @@ -131,14 +132,14 @@ ctanh(double complex z) s = sinh(x); rho = sqrt(1 + s * s); /* = cosh(x) */ denom = 1 + beta * s * s; - return (cpack((beta * rho * s) / denom, t / denom)); + return (CMPLX((beta * rho * s) / denom, t / denom)); } double complex ctan(double complex z) { - /* ctan(z) = -I * ctanh(I * z) */ - z = ctanh(cpack(-cimag(z), creal(z))); - return (cpack(cimag(z), -creal(z))); + /* ctan(z) = -I * ctanh(I * z) = I * conj(ctanh(I * conj(z))) */ + z = ctanh(CMPLX(cimag(z), creal(z))); + return (CMPLX(cimag(z), creal(z))); } diff --git a/libm/upstream-freebsd/lib/msun/src/s_ctanhf.c b/libm/upstream-freebsd/lib/msun/src/s_ctanhf.c index 4be28d8..e9826c0 100644 --- a/libm/upstream-freebsd/lib/msun/src/s_ctanhf.c +++ b/libm/upstream-freebsd/lib/msun/src/s_ctanhf.c @@ -29,7 +29,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/lib/msun/src/s_ctanhf.c 284428 2015-06-15 20:47:26Z tijl $"); #include <complex.h> #include <math.h> @@ -51,18 +51,19 @@ ctanhf(float complex z) if (ix >= 0x7f800000) { if (ix & 0x7fffff) - return (cpackf(x, (y == 0 ? y : x * y))); + return (CMPLXF((x + 0) * (y + 0), + y == 0 ? y : (x + 0) * (y + 0))); SET_FLOAT_WORD(x, hx - 0x40000000); - return (cpackf(x, + return (CMPLXF(x, copysignf(0, isinf(y) ? y : sinf(y) * cosf(y)))); } if (!isfinite(y)) - return (cpackf(y - y, y - y)); + return (CMPLXF(y - y, y - y)); - if (ix >= 0x41300000) { /* x >= 11 */ + if (ix >= 0x41300000) { /* |x| >= 11 */ float exp_mx = expf(-fabsf(x)); - return (cpackf(copysignf(1, x), + return (CMPLXF(copysignf(1, x), 4 * sinf(y) * cosf(y) * exp_mx * exp_mx)); } @@ -71,14 +72,14 @@ ctanhf(float complex z) s = sinhf(x); rho = sqrtf(1 + s * s); denom = 1 + beta * s * s; - return (cpackf((beta * rho * s) / denom, t / denom)); + return (CMPLXF((beta * rho * s) / denom, t / denom)); } float complex ctanf(float complex z) { - z = ctanhf(cpackf(-cimagf(z), crealf(z))); - return (cpackf(cimagf(z), -crealf(z))); + z = ctanhf(CMPLXF(cimagf(z), crealf(z))); + return (CMPLXF(cimagf(z), crealf(z))); } diff --git a/libm/upstream-freebsd/lib/msun/src/s_exp2.c b/libm/upstream-freebsd/lib/msun/src/s_exp2.c index fde11c2..dbef729 100644 --- a/libm/upstream-freebsd/lib/msun/src/s_exp2.c +++ b/libm/upstream-freebsd/lib/msun/src/s_exp2.c @@ -25,7 +25,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/lib/msun/src/s_exp2.c 286515 2015-08-09 10:00:13Z dim $"); #include <float.h> @@ -376,14 +376,14 @@ exp2(double x) /* Compute r = exp2(y) = exp2t[i0] * p(z - eps[i]). */ t = tbl[i0]; /* exp2t[i0] */ z -= tbl[i0 + 1]; /* eps[i0] */ - if (k >= -1021 << 20) + if (k >= -(1021 << 20)) INSERT_WORDS(twopk, 0x3ff00000 + k, 0); else INSERT_WORDS(twopkp1000, 0x3ff00000 + k + (1000 << 20), 0); r = t + t * z * (P1 + z * (P2 + z * (P3 + z * (P4 + z * P5)))); /* Scale by 2**(k>>20). */ - if(k >= -1021 << 20) { + if(k >= -(1021 << 20)) { if (k == 1024 << 20) return (r * 2.0 * 0x1p1023); return (r * twopk); diff --git a/libm/upstream-freebsd/lib/msun/src/s_fabs.c b/libm/upstream-freebsd/lib/msun/src/s_fabs.c deleted file mode 100644 index 15529e5..0000000 --- a/libm/upstream-freebsd/lib/msun/src/s_fabs.c +++ /dev/null @@ -1,31 +0,0 @@ -/* @(#)s_fabs.c 5.1 93/09/24 */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -#ifndef lint -static char rcsid[] = "$FreeBSD$"; -#endif - -/* - * fabs(x) returns the absolute value of x. - */ - -#include "math.h" -#include "math_private.h" - -double -fabs(double x) -{ - u_int32_t high; - GET_HIGH_WORD(high,x); - SET_HIGH_WORD(x,high&0x7fffffff); - return x; -} diff --git a/libm/upstream-freebsd/lib/msun/src/s_fabsf.c b/libm/upstream-freebsd/lib/msun/src/s_fabsf.c deleted file mode 100644 index e9383d0..0000000 --- a/libm/upstream-freebsd/lib/msun/src/s_fabsf.c +++ /dev/null @@ -1,33 +0,0 @@ -/* s_fabsf.c -- float version of s_fabs.c. - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - */ - -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -/* - * fabsf(x) returns the absolute value of x. - */ - -#include "math.h" -#include "math_private.h" - -float -fabsf(float x) -{ - u_int32_t ix; - GET_FLOAT_WORD(ix,x); - SET_FLOAT_WORD(x,ix&0x7fffffff); - return x; -} diff --git a/libm/upstream-freebsd/lib/msun/src/s_scalbln.c b/libm/upstream-freebsd/lib/msun/src/s_scalbln.c index d609d4e..8e61377 100644 --- a/libm/upstream-freebsd/lib/msun/src/s_scalbln.c +++ b/libm/upstream-freebsd/lib/msun/src/s_scalbln.c @@ -25,52 +25,30 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/lib/msun/src/s_scalbln.c 278339 2015-02-07 00:38:18Z kargl $"); -#include <limits.h> #include <math.h> +#define NMAX 65536 +#define NMIN -65536 + double -scalbln (double x, long n) +scalbln(double x, long n) { - int in; - in = (int)n; - if (in != n) { - if (n > 0) - in = INT_MAX; - else - in = INT_MIN; - } - return (scalbn(x, in)); + return (scalbn(x, (n > NMAX) ? NMAX : (n < NMIN) ? NMIN : (int)n)); } float -scalblnf (float x, long n) +scalblnf(float x, long n) { - int in; - in = (int)n; - if (in != n) { - if (n > 0) - in = INT_MAX; - else - in = INT_MIN; - } - return (scalbnf(x, in)); + return (scalbnf(x, (n > NMAX) ? NMAX : (n < NMIN) ? NMIN : (int)n)); } long double -scalblnl (long double x, long n) +scalblnl(long double x, long n) { - int in; - in = (int)n; - if (in != n) { - if (n > 0) - in = INT_MAX; - else - in = INT_MIN; - } - return (scalbnl(x, (int)n)); + return (scalbnl(x, (n > NMAX) ? NMAX : (n < NMIN) ? NMIN : (int)n)); } diff --git a/linker/linker.cpp b/linker/linker.cpp index d3ac1d0..bc40cf1 100644 --- a/linker/linker.cpp +++ b/linker/linker.cpp @@ -890,6 +890,67 @@ typedef linked_list_t<soinfo> SoinfoLinkedList; typedef linked_list_t<const char> StringLinkedList; typedef linked_list_t<LoadTask> LoadTaskList; +static soinfo* find_library(const char* name, int rtld_flags, const android_dlextinfo* extinfo); + +// g_ld_all_shim_libs maintains the references to memory as it used +// in the soinfo structures and in the g_active_shim_libs list. + +static std::vector<std::string> g_ld_all_shim_libs; + +// g_active_shim_libs are all shim libs that are still eligible +// to be loaded. We must remove a shim lib from the list before +// we load the library to avoid recursive loops (load shim libA +// for libB where libA also links against libB). + +static linked_list_t<const std::string> g_active_shim_libs; + +static void reset_g_active_shim_libs(void) { + g_active_shim_libs.clear(); + for (const auto& pair : g_ld_all_shim_libs) { + g_active_shim_libs.push_back(&pair); + } +} + +static void parse_LD_SHIM_LIBS(const char* path) { + parse_path(path, " :", &g_ld_all_shim_libs); + reset_g_active_shim_libs(); +} + +static bool shim_lib_matches(const char *shim_lib, const char *realpath) { + const char *sep = strchr(shim_lib, '|'); + return sep != nullptr && strncmp(realpath, shim_lib, sep - shim_lib) == 0; +} + +template<typename F> +static void shim_libs_for_each(const char *const path, F action) { + if (path == nullptr) return; + INFO("Finding shim libs for \"%s\"\n", path); + std::vector<const std::string *> matched; + + g_active_shim_libs.for_each([&](const std::string *a_pair) { + const char *pair = a_pair->c_str(); + if (shim_lib_matches(pair, path)) { + matched.push_back(a_pair); + } + }); + + g_active_shim_libs.remove_if([&](const std::string *a_pair) { + const char *pair = a_pair->c_str(); + return shim_lib_matches(pair, path); + }); + + for (const auto& one_pair : matched) { + const char* const pair = one_pair->c_str(); + const char* sep = strchr(pair, '|'); + soinfo *child = find_library(sep+1, RTLD_GLOBAL, nullptr); + if (child) { + INFO("Using shim lib \"%s\"\n", sep+1); + action(child); + } else { + PRINT("Shim lib \"%s\" can not be loaded, ignoring.", sep+1); + } + } +} // This function walks down the tree of soinfo dependencies // in breadth-first order and @@ -899,7 +960,7 @@ typedef linked_list_t<LoadTask> LoadTaskList; // walk_dependencies_tree returns false if walk was terminated // by the action and true otherwise. template<typename F> -static bool walk_dependencies_tree(soinfo* root_soinfos[], size_t root_soinfos_size, F action) { +static bool walk_dependencies_tree(soinfo* root_soinfos[], size_t root_soinfos_size, bool do_shims, F action) { SoinfoLinkedList visit_list; SoinfoLinkedList visited; @@ -919,6 +980,13 @@ static bool walk_dependencies_tree(soinfo* root_soinfos[], size_t root_soinfos_s visited.push_back(si); + if (do_shims) { + shim_libs_for_each(si->get_realpath(), [&](soinfo* child) { + si->add_child(child); + visit_list.push_back(child); + }); + } + si->get_children().for_each([&](soinfo* child) { visit_list.push_back(child); }); @@ -933,7 +1001,7 @@ static const ElfW(Sym)* dlsym_handle_lookup(soinfo* root, soinfo* skip_until, const ElfW(Sym)* result = nullptr; bool skip_lookup = skip_until != nullptr; - walk_dependencies_tree(&root, 1, [&](soinfo* current_soinfo) { + walk_dependencies_tree(&root, 1, false, [&](soinfo* current_soinfo) { if (skip_lookup) { skip_lookup = current_soinfo != skip_until; return true; @@ -1513,6 +1581,7 @@ static bool find_libraries(soinfo* start_with, const char* const library_names[] walk_dependencies_tree( start_with == nullptr ? soinfos : &start_with, start_with == nullptr ? soinfos_count : 1, + true, [&] (soinfo* si) { local_group.push_back(si); return true; @@ -1692,6 +1761,7 @@ soinfo* do_dlopen(const char* name, int flags, const android_dlextinfo* extinfo) } ProtectedDataGuard guard; + reset_g_active_shim_libs(); soinfo* si = find_library(name, flags, extinfo); if (si != nullptr) { si->call_constructors(); @@ -2943,12 +3013,14 @@ bool soinfo::link_image(const soinfo_list_t& global_group, const soinfo_list_t& if (has_text_relocations) { // Fail if app is targeting sdk version > 22 // TODO (dimitry): remove != __ANDROID_API__ check once http://b/20020312 is fixed +#if !defined(__i386__) // ffmpeg says that they require text relocations on x86 if (get_application_target_sdk_version() != __ANDROID_API__ && get_application_target_sdk_version() > 22) { PRINT("%s: has text relocations", get_realpath()); DL_ERR("%s: has text relocations", get_realpath()); return false; } +#endif // Make segments writable to allow text relocations to work properly. We will later call // phdr_table_protect_segments() after all of them are applied and all constructors are run. DL_WARN("%s has text relocations. This is wasting memory and prevents " @@ -3164,9 +3236,11 @@ static ElfW(Addr) __linker_init_post_relocation(KernelArgumentBlock& args, ElfW( // doesn't cost us anything. const char* ldpath_env = nullptr; const char* ldpreload_env = nullptr; + const char* ldshim_libs_env = nullptr; if (!getauxval(AT_SECURE)) { ldpath_env = getenv("LD_LIBRARY_PATH"); ldpreload_env = getenv("LD_PRELOAD"); + ldshim_libs_env = getenv("LD_SHIM_LIBS"); } INFO("[ android linker & debugger ]"); @@ -3220,6 +3294,7 @@ static ElfW(Addr) __linker_init_post_relocation(KernelArgumentBlock& args, ElfW( // Use LD_LIBRARY_PATH and LD_PRELOAD (but only if we aren't setuid/setgid). parse_LD_LIBRARY_PATH(ldpath_env); parse_LD_PRELOAD(ldpreload_env); + parse_LD_SHIM_LIBS(ldshim_libs_env); somain = si; diff --git a/tests/Android.mk b/tests/Android.mk index dc2e410..b994cc3 100644 --- a/tests/Android.mk +++ b/tests/Android.mk @@ -64,6 +64,7 @@ libBionicStandardTests_src_files := \ getcwd_test.cpp \ inttypes_test.cpp \ libc_logging_test.cpp \ + libgen_basename_test.cpp \ libgen_test.cpp \ locale_test.cpp \ malloc_test.cpp \ diff --git a/tests/buffer_tests.cpp b/tests/buffer_tests.cpp index 4967382..a2b330e 100644 --- a/tests/buffer_tests.cpp +++ b/tests/buffer_tests.cpp @@ -381,15 +381,19 @@ void RunSrcDstBufferOverreadTest(void (*test_func)(uint8_t*, uint8_t*, size_t)) // Make the second page unreadable and unwritable. ASSERT_TRUE(mprotect(&memory[pagesize], pagesize, PROT_NONE) == 0); - uint8_t* dst = new uint8_t[pagesize]; - for (size_t i = 0; i < pagesize; i++) { - uint8_t* src = &memory[pagesize-i]; - - test_func(src, dst, i); + uint8_t* dst_buffer = new uint8_t[2*pagesize]; + // Change the dst alignment as we change the source. + for (size_t i = 0; i < 16; i++) { + uint8_t* dst = &dst_buffer[i]; + for (size_t j = 0; j < pagesize; j++) { + uint8_t* src = &memory[pagesize-j]; + + test_func(src, dst, j); + } } ASSERT_TRUE(mprotect(&memory[pagesize], pagesize, PROT_READ | PROT_WRITE) == 0); free(memory); - delete dst; + delete dst_buffer; } void RunCmpBufferOverreadTest( diff --git a/tests/libgen_basename_test.cpp b/tests/libgen_basename_test.cpp new file mode 100644 index 0000000..55939d1 --- /dev/null +++ b/tests/libgen_basename_test.cpp @@ -0,0 +1,89 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _GNU_SOURCE + #define _GNU_SOURCE 1 +#endif + +#include <string.h> + +#if defined(basename) + #error basename should not be defined at this point +#endif + +static const char* gnu_basename(const char* in) { + return basename(in); +} + +#include <libgen.h> + +#if !defined(basename) + #error basename should be defined at this point +#endif + +static char* posix_basename(char* in) { + return basename(in); +} + +#include <errno.h> +#include <gtest/gtest.h> + +static void __TestGnuBasename(const char* in, const char* expected_out, int line) { + const char* out = gnu_basename(in); + ASSERT_STREQ(expected_out, out) << "(" << line << "): " << in << std::endl; + ASSERT_EQ(0, errno) << "(" << line << "): " << in << std::endl; +} + +static void __TestPosixBasename(const char* in, const char* expected_out, int line) { + char* writable_in = (in != NULL) ? strdup(in) : NULL; + errno = 0; + const char* out = posix_basename(&writable_in[0]); + ASSERT_STREQ(expected_out, out) << "(" << line << "): " << in << std::endl; + ASSERT_EQ(0, errno) << "(" << line << "): " << in << std::endl; + free(writable_in); +} + +#define TestGnuBasename(in, expected) __TestGnuBasename(in, expected, __LINE__) +#define TestPosixBasename(in, expected) __TestPosixBasename(in, expected, __LINE__) + +TEST(libgen_basename, gnu_basename) { + // GNU's basename doesn't accept NULL + // TestGnuBasename(NULL, "."); + TestGnuBasename("", ""); + TestGnuBasename("/usr/lib", "lib"); + TestGnuBasename("/system/bin/sh/", ""); + TestGnuBasename("/usr/", ""); + TestGnuBasename("usr", "usr"); + TestGnuBasename("/", ""); + TestGnuBasename(".", "."); + TestGnuBasename("..", ".."); + TestGnuBasename("///", ""); + TestGnuBasename("//usr//lib//", ""); +} + +TEST(libgen_basename, posix_basename) { + TestPosixBasename(NULL, "."); + TestPosixBasename("", "."); + TestPosixBasename("/usr/lib", "lib"); + TestPosixBasename("/system/bin/sh/", "sh"); + TestPosixBasename("/usr/", "usr"); + TestPosixBasename("usr", "usr"); + TestPosixBasename("/", "/"); + TestPosixBasename(".", "."); + TestPosixBasename("..", ".."); + TestPosixBasename("///", "/"); + TestPosixBasename("//usr//lib//", "lib"); +} diff --git a/tests/libgen_test.cpp b/tests/libgen_test.cpp index e9a5d5c..8a37a3f 100644 --- a/tests/libgen_test.cpp +++ b/tests/libgen_test.cpp @@ -19,15 +19,6 @@ #include <errno.h> #include <gtest/gtest.h> -static void TestBasename(const char* in, const char* expected_out) { - char* writable_in = (in != NULL) ? strdup(in) : NULL; - errno = 0; - const char* out = basename(&writable_in[0]); - ASSERT_STREQ(expected_out, out) << in; - ASSERT_EQ(0, errno) << in; - free(writable_in); -} - static void TestDirname(const char* in, const char* expected_out) { char* writable_in = (in != NULL) ? strdup(in) : NULL; errno = 0; @@ -37,21 +28,6 @@ static void TestDirname(const char* in, const char* expected_out) { free(writable_in); } -// Do not use basename as the test name, it's defined to another value in glibc -// so leads to a differently named test on host versus target architectures. -TEST(libgen, posix_basename) { - TestBasename(NULL, "."); - TestBasename("", "."); - TestBasename("/usr/lib", "lib"); - TestBasename("/usr/", "usr"); - TestBasename("usr", "usr"); - TestBasename("/", "/"); - TestBasename(".", "."); - TestBasename("..", ".."); - TestBasename("///", "/"); - TestBasename("//usr//lib//", "lib"); -} - TEST(libgen, dirname) { TestDirname(NULL, "."); TestDirname("", "."); diff --git a/tests/regex_test.cpp b/tests/regex_test.cpp index d026221..4a4409e 100644 --- a/tests/regex_test.cpp +++ b/tests/regex_test.cpp @@ -36,3 +36,13 @@ TEST(regex, smoke) { regfree(&re); } + +TEST(regex, match_offsets) { + regex_t re; + regmatch_t matches[1]; + ASSERT_EQ(0, regcomp(&re, "b", 0)); + ASSERT_EQ(0, regexec(&re, "abc", 1, matches, 0)); + ASSERT_EQ(1, matches[0].rm_so); + ASSERT_EQ(2, matches[0].rm_eo); + regfree(&re); +} diff --git a/tests/string_test.cpp b/tests/string_test.cpp index 1d63c76..3d97d81 100644 --- a/tests/string_test.cpp +++ b/tests/string_test.cpp @@ -1166,7 +1166,7 @@ static size_t LargeSetIncrement(size_t len) { return 1; } -#define STRCAT_DST_LEN 128 +#define STRCAT_DST_LEN 64 static void DoStrcatTest(uint8_t* src, uint8_t* dst, size_t len) { if (len >= 1) { @@ -1181,7 +1181,7 @@ static void DoStrcatTest(uint8_t* src, uint8_t* dst, size_t len) { int value2 = 32 + (value + 2) % 96; memset(cmp_buf, value2, sizeof(cmp_buf)); - for (size_t i = 1; i <= STRCAT_DST_LEN; i++) { + for (size_t i = 1; i <= STRCAT_DST_LEN;) { memset(dst, value2, i-1); memset(dst+i-1, 0, len-i); src[len-i] = '\0'; @@ -1189,6 +1189,13 @@ static void DoStrcatTest(uint8_t* src, uint8_t* dst, size_t len) { reinterpret_cast<char*>(src)))); ASSERT_TRUE(memcmp(dst, cmp_buf, i-1) == 0); ASSERT_TRUE(memcmp(src, dst+i-1, len-i+1) == 0); + // This is an expensive loop, so don't loop through every value, + // get to a certain size and then start doubling. + if (i < 16) { + i++; + } else { + i <<= 1; + } } } else { dst[0] = '\0'; @@ -1221,7 +1228,7 @@ static void DoStrlcatTest(uint8_t* src, uint8_t* dst, size_t len) { int value2 = 32 + (value + 2) % 96; memset(cmp_buf, value2, sizeof(cmp_buf)); - for (size_t i = 1; i <= STRCAT_DST_LEN; i++) { + for (size_t i = 1; i <= STRCAT_DST_LEN;) { memset(dst, value2, i-1); memset(dst+i-1, 0, len-i); src[len-i] = '\0'; @@ -1229,6 +1236,13 @@ static void DoStrlcatTest(uint8_t* src, uint8_t* dst, size_t len) { reinterpret_cast<char*>(src), len)); ASSERT_TRUE(memcmp(dst, cmp_buf, i-1) == 0); ASSERT_TRUE(memcmp(src, dst+i-1, len-i+1) == 0); + // This is an expensive loop, so don't loop through every value, + // get to a certain size and then start doubling. + if (i < 16) { + i++; + } else { + i <<= 1; + } } } else { dst[0] = '\0'; diff --git a/tests/sys_resource_test.cpp b/tests/sys_resource_test.cpp index 8cefc65..0b6b6ef 100644 --- a/tests/sys_resource_test.cpp +++ b/tests/sys_resource_test.cpp @@ -33,7 +33,8 @@ class SysResourceTest : public ::testing::Test { virtual void SetUp() { ASSERT_EQ(0, getrlimit(RLIMIT_CORE, &l32_)); ASSERT_EQ(0, getrlimit64(RLIMIT_CORE, &l64_)); - ASSERT_EQ(0, prlimit64(0, RLIMIT_CORE, NULL, &pr_l64_)); + ASSERT_EQ(0, prlimit(0, RLIMIT_CORE, nullptr, &pr_l32_)); + ASSERT_EQ(0, prlimit64(0, RLIMIT_CORE, nullptr, &pr_l64_)); } void CheckResourceLimits(); @@ -41,21 +42,28 @@ class SysResourceTest : public ::testing::Test { protected: rlimit l32_; rlimit64 l64_; + rlimit pr_l32_; rlimit64 pr_l64_; }; void SysResourceTest::CheckResourceLimits() { ASSERT_EQ(0, getrlimit(RLIMIT_CORE, &l32_)); ASSERT_EQ(0, getrlimit64(RLIMIT_CORE, &l64_)); - ASSERT_EQ(0, prlimit64(0, RLIMIT_CORE, NULL, &pr_l64_)); + ASSERT_EQ(0, prlimit(0, RLIMIT_CORE, nullptr, &pr_l32_)); + ASSERT_EQ(0, prlimit64(0, RLIMIT_CORE, nullptr, &pr_l64_)); + + ASSERT_EQ(l32_.rlim_cur, pr_l32_.rlim_cur); ASSERT_EQ(l64_.rlim_cur, pr_l64_.rlim_cur); + if (l64_.rlim_cur == RLIM64_INFINITY) { ASSERT_EQ(RLIM_INFINITY, l32_.rlim_cur); } else { ASSERT_EQ(l64_.rlim_cur, l32_.rlim_cur); } + ASSERT_EQ(l32_.rlim_max, pr_l32_.rlim_max); ASSERT_EQ(l64_.rlim_max, pr_l64_.rlim_max); + if (l64_.rlim_max == RLIM64_INFINITY) { ASSERT_EQ(RLIM_INFINITY, l32_.rlim_max); } else { @@ -88,13 +96,16 @@ TEST_F(SysResourceTest, setrlimit64) { ASSERT_EQ(456U, l64_.rlim_cur); } +TEST_F(SysResourceTest, prlimit) { + pr_l32_.rlim_cur = pr_l32_.rlim_max; + ASSERT_EQ(0, prlimit(0, RLIMIT_CORE, &pr_l32_, nullptr)); + CheckResourceLimits(); + ASSERT_EQ(pr_l32_.rlim_max, pr_l32_.rlim_cur); +} + TEST_F(SysResourceTest, prlimit64) { pr_l64_.rlim_cur = pr_l64_.rlim_max; - ASSERT_EQ(0, prlimit64(0, RLIMIT_CORE, &pr_l64_, NULL)); + ASSERT_EQ(0, prlimit64(0, RLIMIT_CORE, &pr_l64_, nullptr)); CheckResourceLimits(); ASSERT_EQ(pr_l64_.rlim_max, pr_l64_.rlim_cur); } - -TEST_F(SysResourceTest, prlimit) { - // prlimit is prlimit64 on LP64 and unimplemented on 32-bit. So we only test prlimit64. -} |