diff options
Diffstat (limited to 'libc/arch-arm')
39 files changed, 2010 insertions, 955 deletions
diff --git a/libc/arch-arm/arm.mk b/libc/arch-arm/arm.mk index d72a160..c2b80c5 100644 --- a/libc/arch-arm/arm.mk +++ b/libc/arch-arm/arm.mk @@ -20,7 +20,6 @@ libc_freebsd_src_files_arm += \ upstream-freebsd/lib/libc/string/wmemmove.c \ libc_openbsd_src_files_arm += \ - upstream-openbsd/lib/libc/string/memchr.c \ upstream-openbsd/lib/libc/string/memrchr.c \ upstream-openbsd/lib/libc/string/stpncpy.c \ upstream-openbsd/lib/libc/string/strlcat.c \ @@ -52,7 +51,7 @@ ifeq ($(strip $(TARGET_$(my_2nd_arch_prefix)CPU_VARIANT)),) endif cpu_variant_mk := $(LOCAL_PATH)/arch-arm/$(TARGET_$(my_2nd_arch_prefix)CPU_VARIANT)/$(TARGET_$(my_2nd_arch_prefix)CPU_VARIANT).mk ifeq ($(wildcard $(cpu_variant_mk)),) -$(error "TARGET_$(my_2nd_arch_prefix)CPU_VARIANT not set or set to an unknown value. Possible values are cortex-a7, cortex-a8, cortex-a9, cortex-a15, krait, denver. Use generic for devices that do not have a CPU similar to any of the supported cpu variants.") +$(error "TARGET_$(my_2nd_arch_prefix)CPU_VARIANT not set or set to an unknown value. Possible values are cortex-a7, cortex-a8, cortex-a9, cortex-a15, krait, scorpion, denver. Use generic for devices that do not have a CPU similar to any of the supported cpu variants.") endif include $(cpu_variant_mk) libc_common_additional_dependencies += $(cpu_variant_mk) diff --git a/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S b/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S index a2e9c22..3692f04 100644 --- a/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S +++ b/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013 The Android Open Source Project + * Copyright (C) 2015 The Android Open Source Project * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,191 +26,7 @@ * SUCH DAMAGE. */ -#include <private/bionic_asm.h> -#include <private/libc_events.h> +// Indicate which memcpy base file to include. +#define MEMCPY_BASE "memcpy_base.S" - .syntax unified - - .thumb - .thumb_func - -// Get the length of src string, then get the source of the dst string. -// Check that the two lengths together don't exceed the threshold, then -// do a memcpy of the data. -ENTRY(__strcat_chk) - pld [r0, #0] - push {r0, lr} - .cfi_def_cfa_offset 8 - .cfi_rel_offset r0, 0 - .cfi_rel_offset lr, 4 - push {r4, r5} - .cfi_adjust_cfa_offset 8 - .cfi_rel_offset r4, 0 - .cfi_rel_offset r5, 4 - - mov lr, r2 - - // Save the dst register to r5 - mov r5, r0 - - // Zero out r4 - eor r4, r4, r4 - - // r1 contains the address of the string to count. -.L_strlen_start: - mov r0, r1 - ands r3, r1, #7 - beq .L_mainloop - - // Align to a double word (64 bits). - rsb r3, r3, #8 - lsls ip, r3, #31 - beq .L_align_to_32 - - ldrb r2, [r1], #1 - cbz r2, .L_update_count_and_finish - -.L_align_to_32: - bcc .L_align_to_64 - ands ip, r3, #2 - beq .L_align_to_64 - - ldrb r2, [r1], #1 - cbz r2, .L_update_count_and_finish - ldrb r2, [r1], #1 - cbz r2, .L_update_count_and_finish - -.L_align_to_64: - tst r3, #4 - beq .L_mainloop - ldr r3, [r1], #4 - - sub ip, r3, #0x01010101 - bic ip, ip, r3 - ands ip, ip, #0x80808080 - bne .L_zero_in_second_register - - .p2align 2 -.L_mainloop: - ldrd r2, r3, [r1], #8 - - pld [r1, #64] - - sub ip, r2, #0x01010101 - bic ip, ip, r2 - ands ip, ip, #0x80808080 - bne .L_zero_in_first_register - - sub ip, r3, #0x01010101 - bic ip, ip, r3 - ands ip, ip, #0x80808080 - bne .L_zero_in_second_register - b .L_mainloop - -.L_update_count_and_finish: - sub r3, r1, r0 - sub r3, r3, #1 - b .L_finish - -.L_zero_in_first_register: - sub r3, r1, r0 - lsls r2, ip, #17 - bne .L_sub8_and_finish - bcs .L_sub7_and_finish - lsls ip, ip, #1 - bne .L_sub6_and_finish - - sub r3, r3, #5 - b .L_finish - -.L_sub8_and_finish: - sub r3, r3, #8 - b .L_finish - -.L_sub7_and_finish: - sub r3, r3, #7 - b .L_finish - -.L_sub6_and_finish: - sub r3, r3, #6 - b .L_finish - -.L_zero_in_second_register: - sub r3, r1, r0 - lsls r2, ip, #17 - bne .L_sub4_and_finish - bcs .L_sub3_and_finish - lsls ip, ip, #1 - bne .L_sub2_and_finish - - sub r3, r3, #1 - b .L_finish - -.L_sub4_and_finish: - sub r3, r3, #4 - b .L_finish - -.L_sub3_and_finish: - sub r3, r3, #3 - b .L_finish - -.L_sub2_and_finish: - sub r3, r3, #2 - -.L_finish: - cmp r4, #0 - bne .L_strlen_done - - // Time to get the dst string length. - mov r1, r5 - - // Save the original source address to r5. - mov r5, r0 - - // Save the current length (adding 1 for the terminator). - add r4, r3, #1 - b .L_strlen_start - - // r0 holds the pointer to the dst string. - // r3 holds the dst string length. - // r4 holds the src string length + 1. -.L_strlen_done: - add r2, r3, r4 - cmp r2, lr - bhi __strcat_chk_failed - - // Set up the registers for the memcpy code. - mov r1, r5 - pld [r1, #64] - mov r2, r4 - add r0, r0, r3 - pop {r4, r5} -END(__strcat_chk) - -#define MEMCPY_BASE __strcat_chk_memcpy_base -#define MEMCPY_BASE_ALIGNED __strcat_chk_memcpy_base_aligned - -#include "memcpy_base.S" - -ENTRY_PRIVATE(__strcat_chk_failed) - .cfi_def_cfa_offset 8 - .cfi_rel_offset r0, 0 - .cfi_rel_offset lr, 4 - .cfi_adjust_cfa_offset 8 - .cfi_rel_offset r4, 0 - .cfi_rel_offset r5, 4 - - ldr r0, error_message - ldr r1, error_code -1: - add r0, pc - bl __fortify_chk_fail -error_code: - .word BIONIC_EVENT_STRCAT_BUFFER_OVERFLOW -error_message: - .word error_string-(1b+4) -END(__strcat_chk_failed) - - .data -error_string: - .string "strcat: prevented write past end of buffer" +#include "__strcat_chk_common.S" diff --git a/libc/arch-arm/cortex-a15/bionic/__strcat_chk_common.S b/libc/arch-arm/cortex-a15/bionic/__strcat_chk_common.S new file mode 100644 index 0000000..de66967 --- /dev/null +++ b/libc/arch-arm/cortex-a15/bionic/__strcat_chk_common.S @@ -0,0 +1,212 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <private/bionic_asm.h> +#include <private/libc_events.h> + + .syntax unified + + .thumb + .thumb_func + +// Get the length of src string, then get the source of the dst string. +// Check that the two lengths together don't exceed the threshold, then +// do a memcpy of the data. +ENTRY(__strcat_chk) + pld [r0, #0] + push {r0, lr} + .cfi_def_cfa_offset 8 + .cfi_rel_offset r0, 0 + .cfi_rel_offset lr, 4 + push {r4, r5} + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset r4, 0 + .cfi_rel_offset r5, 4 + + mov lr, r2 + + // Save the dst register to r5 + mov r5, r0 + + // Zero out r4 + eor r4, r4, r4 + + // r1 contains the address of the string to count. +.L_strlen_start: + mov r0, r1 + ands r3, r1, #7 + beq .L_mainloop + + // Align to a double word (64 bits). + rsb r3, r3, #8 + lsls ip, r3, #31 + beq .L_align_to_32 + + ldrb r2, [r1], #1 + cbz r2, .L_update_count_and_finish + +.L_align_to_32: + bcc .L_align_to_64 + ands ip, r3, #2 + beq .L_align_to_64 + + ldrb r2, [r1], #1 + cbz r2, .L_update_count_and_finish + ldrb r2, [r1], #1 + cbz r2, .L_update_count_and_finish + +.L_align_to_64: + tst r3, #4 + beq .L_mainloop + ldr r3, [r1], #4 + + sub ip, r3, #0x01010101 + bic ip, ip, r3 + ands ip, ip, #0x80808080 + bne .L_zero_in_second_register + + .p2align 2 +.L_mainloop: + ldrd r2, r3, [r1], #8 + + pld [r1, #64] + + sub ip, r2, #0x01010101 + bic ip, ip, r2 + ands ip, ip, #0x80808080 + bne .L_zero_in_first_register + + sub ip, r3, #0x01010101 + bic ip, ip, r3 + ands ip, ip, #0x80808080 + bne .L_zero_in_second_register + b .L_mainloop + +.L_update_count_and_finish: + sub r3, r1, r0 + sub r3, r3, #1 + b .L_finish + +.L_zero_in_first_register: + sub r3, r1, r0 + lsls r2, ip, #17 + bne .L_sub8_and_finish + bcs .L_sub7_and_finish + lsls ip, ip, #1 + bne .L_sub6_and_finish + + sub r3, r3, #5 + b .L_finish + +.L_sub8_and_finish: + sub r3, r3, #8 + b .L_finish + +.L_sub7_and_finish: + sub r3, r3, #7 + b .L_finish + +.L_sub6_and_finish: + sub r3, r3, #6 + b .L_finish + +.L_zero_in_second_register: + sub r3, r1, r0 + lsls r2, ip, #17 + bne .L_sub4_and_finish + bcs .L_sub3_and_finish + lsls ip, ip, #1 + bne .L_sub2_and_finish + + sub r3, r3, #1 + b .L_finish + +.L_sub4_and_finish: + sub r3, r3, #4 + b .L_finish + +.L_sub3_and_finish: + sub r3, r3, #3 + b .L_finish + +.L_sub2_and_finish: + sub r3, r3, #2 + +.L_finish: + cmp r4, #0 + bne .L_strlen_done + + // Time to get the dst string length. + mov r1, r5 + + // Save the original source address to r5. + mov r5, r0 + + // Save the current length (adding 1 for the terminator). + add r4, r3, #1 + b .L_strlen_start + + // r0 holds the pointer to the dst string. + // r3 holds the dst string length. + // r4 holds the src string length + 1. +.L_strlen_done: + add r2, r3, r4 + cmp r2, lr + bhi .L_strcat_chk_failed + + // Set up the registers for the memcpy code. + mov r1, r5 + pld [r1, #64] + mov r2, r4 + add r0, r0, r3 + pop {r4, r5} + .cfi_adjust_cfa_offset -8 + .cfi_restore r4 + .cfi_restore r5 + +#include MEMCPY_BASE + + // Undo the above cfi directives + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset r4, 0 + .cfi_rel_offset r5, 4 +.L_strcat_chk_failed: + ldr r0, error_message + ldr r1, error_code +1: + add r0, pc + bl __fortify_chk_fail +error_code: + .word BIONIC_EVENT_STRCAT_BUFFER_OVERFLOW +error_message: + .word error_string-(1b+4) +END(__strcat_chk) + + .data +error_string: + .string "strcat: prevented write past end of buffer" diff --git a/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S b/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S index db76686..d8cb3d9 100644 --- a/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S +++ b/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013 The Android Open Source Project + * Copyright (C) 2015 The Android Open Source Project * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,155 +26,7 @@ * SUCH DAMAGE. */ -#include <private/bionic_asm.h> -#include <private/libc_events.h> +// Indicate which memcpy base file to include. +#define MEMCPY_BASE "memcpy_base.S" - .syntax unified - - .thumb - .thumb_func - -// Get the length of the source string first, then do a memcpy of the data -// instead of a strcpy. -ENTRY(__strcpy_chk) - pld [r0, #0] - push {r0, lr} - .cfi_def_cfa_offset 8 - .cfi_rel_offset r0, 0 - .cfi_rel_offset lr, 4 - - mov lr, r2 - mov r0, r1 - - ands r3, r1, #7 - beq .L_mainloop - - // Align to a double word (64 bits). - rsb r3, r3, #8 - lsls ip, r3, #31 - beq .L_align_to_32 - - ldrb r2, [r0], #1 - cbz r2, .L_update_count_and_finish - -.L_align_to_32: - bcc .L_align_to_64 - ands ip, r3, #2 - beq .L_align_to_64 - - ldrb r2, [r0], #1 - cbz r2, .L_update_count_and_finish - ldrb r2, [r0], #1 - cbz r2, .L_update_count_and_finish - -.L_align_to_64: - tst r3, #4 - beq .L_mainloop - ldr r3, [r0], #4 - - sub ip, r3, #0x01010101 - bic ip, ip, r3 - ands ip, ip, #0x80808080 - bne .L_zero_in_second_register - - .p2align 2 -.L_mainloop: - ldrd r2, r3, [r0], #8 - - pld [r0, #64] - - sub ip, r2, #0x01010101 - bic ip, ip, r2 - ands ip, ip, #0x80808080 - bne .L_zero_in_first_register - - sub ip, r3, #0x01010101 - bic ip, ip, r3 - ands ip, ip, #0x80808080 - bne .L_zero_in_second_register - b .L_mainloop - -.L_update_count_and_finish: - sub r3, r0, r1 - sub r3, r3, #1 - b .L_check_size - -.L_zero_in_first_register: - sub r3, r0, r1 - lsls r2, ip, #17 - bne .L_sub8_and_finish - bcs .L_sub7_and_finish - lsls ip, ip, #1 - bne .L_sub6_and_finish - - sub r3, r3, #5 - b .L_check_size - -.L_sub8_and_finish: - sub r3, r3, #8 - b .L_check_size - -.L_sub7_and_finish: - sub r3, r3, #7 - b .L_check_size - -.L_sub6_and_finish: - sub r3, r3, #6 - b .L_check_size - -.L_zero_in_second_register: - sub r3, r0, r1 - lsls r2, ip, #17 - bne .L_sub4_and_finish - bcs .L_sub3_and_finish - lsls ip, ip, #1 - bne .L_sub2_and_finish - - sub r3, r3, #1 - b .L_check_size - -.L_sub4_and_finish: - sub r3, r3, #4 - b .L_check_size - -.L_sub3_and_finish: - sub r3, r3, #3 - b .L_check_size - -.L_sub2_and_finish: - sub r3, r3, #2 - -.L_check_size: - pld [r1, #0] - pld [r1, #64] - ldr r0, [sp] - cmp r3, lr - bhs __strcpy_chk_failed - - // Add 1 for copy length to get the string terminator. - add r2, r3, #1 -END(__strcpy_chk) - -#define MEMCPY_BASE __strcpy_chk_memcpy_base -#define MEMCPY_BASE_ALIGNED __strcpy_chk_memcpy_base_aligned -#include "memcpy_base.S" - -ENTRY_PRIVATE(__strcpy_chk_failed) - .cfi_def_cfa_offset 8 - .cfi_rel_offset r0, 0 - .cfi_rel_offset lr, 4 - - ldr r0, error_message - ldr r1, error_code -1: - add r0, pc - bl __fortify_chk_fail -error_code: - .word BIONIC_EVENT_STRCPY_BUFFER_OVERFLOW -error_message: - .word error_string-(1b+4) -END(__strcpy_chk_failed) - - .data -error_string: - .string "strcpy: prevented write past end of buffer" +#include "__strcpy_chk_common.S" diff --git a/libc/arch-arm/cortex-a15/bionic/__strcpy_chk_common.S b/libc/arch-arm/cortex-a15/bionic/__strcpy_chk_common.S new file mode 100644 index 0000000..69ebcb4 --- /dev/null +++ b/libc/arch-arm/cortex-a15/bionic/__strcpy_chk_common.S @@ -0,0 +1,173 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <private/bionic_asm.h> +#include <private/libc_events.h> + + .syntax unified + + .thumb + .thumb_func + +// Get the length of the source string first, then do a memcpy of the data +// instead of a strcpy. +ENTRY(__strcpy_chk) + pld [r0, #0] + push {r0, lr} + .cfi_def_cfa_offset 8 + .cfi_rel_offset r0, 0 + .cfi_rel_offset lr, 4 + + mov lr, r2 + mov r0, r1 + + ands r3, r1, #7 + beq .L_mainloop + + // Align to a double word (64 bits). + rsb r3, r3, #8 + lsls ip, r3, #31 + beq .L_align_to_32 + + ldrb r2, [r0], #1 + cbz r2, .L_update_count_and_finish + +.L_align_to_32: + bcc .L_align_to_64 + ands ip, r3, #2 + beq .L_align_to_64 + + ldrb r2, [r0], #1 + cbz r2, .L_update_count_and_finish + ldrb r2, [r0], #1 + cbz r2, .L_update_count_and_finish + +.L_align_to_64: + tst r3, #4 + beq .L_mainloop + ldr r3, [r0], #4 + + sub ip, r3, #0x01010101 + bic ip, ip, r3 + ands ip, ip, #0x80808080 + bne .L_zero_in_second_register + + .p2align 2 +.L_mainloop: + ldrd r2, r3, [r0], #8 + + pld [r0, #64] + + sub ip, r2, #0x01010101 + bic ip, ip, r2 + ands ip, ip, #0x80808080 + bne .L_zero_in_first_register + + sub ip, r3, #0x01010101 + bic ip, ip, r3 + ands ip, ip, #0x80808080 + bne .L_zero_in_second_register + b .L_mainloop + +.L_update_count_and_finish: + sub r3, r0, r1 + sub r3, r3, #1 + b .L_check_size + +.L_zero_in_first_register: + sub r3, r0, r1 + lsls r2, ip, #17 + bne .L_sub8_and_finish + bcs .L_sub7_and_finish + lsls ip, ip, #1 + bne .L_sub6_and_finish + + sub r3, r3, #5 + b .L_check_size + +.L_sub8_and_finish: + sub r3, r3, #8 + b .L_check_size + +.L_sub7_and_finish: + sub r3, r3, #7 + b .L_check_size + +.L_sub6_and_finish: + sub r3, r3, #6 + b .L_check_size + +.L_zero_in_second_register: + sub r3, r0, r1 + lsls r2, ip, #17 + bne .L_sub4_and_finish + bcs .L_sub3_and_finish + lsls ip, ip, #1 + bne .L_sub2_and_finish + + sub r3, r3, #1 + b .L_check_size + +.L_sub4_and_finish: + sub r3, r3, #4 + b .L_check_size + +.L_sub3_and_finish: + sub r3, r3, #3 + b .L_check_size + +.L_sub2_and_finish: + sub r3, r3, #2 + +.L_check_size: + pld [r1, #0] + pld [r1, #64] + ldr r0, [sp] + cmp r3, lr + bhs .L_strcpy_chk_failed + + // Add 1 for copy length to get the string terminator. + add r2, r3, #1 + +#include MEMCPY_BASE + +.L_strcpy_chk_failed: + ldr r0, error_message + ldr r1, error_code +1: + add r0, pc + bl __fortify_chk_fail +error_code: + .word BIONIC_EVENT_STRCPY_BUFFER_OVERFLOW +error_message: + .word error_string-(1b+4) +END(__strcpy_chk) + + .data +error_string: + .string "strcpy: prevented write past end of buffer" diff --git a/libc/arch-arm/cortex-a15/bionic/memcpy.S b/libc/arch-arm/cortex-a15/bionic/memcpy.S index 410b663..537f3de 100644 --- a/libc/arch-arm/cortex-a15/bionic/memcpy.S +++ b/libc/arch-arm/cortex-a15/bionic/memcpy.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008 The Android Open Source Project + * Copyright (C) 2015 The Android Open Source Project * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -25,79 +25,8 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ -/* - * Copyright (c) 2013 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -// Prototype: void *memcpy (void *dst, const void *src, size_t count). - -#include <private/bionic_asm.h> -#include <private/libc_events.h> - - .text - .syntax unified - .fpu neon - -ENTRY(__memcpy_chk) - cmp r2, r3 - bhi __memcpy_chk_fail - - // Fall through to memcpy... -END(__memcpy_chk) - -ENTRY(memcpy) - pld [r1, #64] - push {r0, lr} - .cfi_def_cfa_offset 8 - .cfi_rel_offset r0, 0 - .cfi_rel_offset lr, 4 -END(memcpy) - -#define MEMCPY_BASE __memcpy_base -#define MEMCPY_BASE_ALIGNED __memcpy_base_aligned -#include "memcpy_base.S" - -ENTRY_PRIVATE(__memcpy_chk_fail) - // Preserve lr for backtrace. - push {lr} - .cfi_def_cfa_offset 4 - .cfi_rel_offset lr, 0 - ldr r0, error_message - ldr r1, error_code -1: - add r0, pc - bl __fortify_chk_fail -error_code: - .word BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW -error_message: - .word error_string-(1b+8) -END(__memcpy_chk_fail) +// Indicate which memcpy base file to include. +#define MEMCPY_BASE "memcpy_base.S" - .data -error_string: - .string "memcpy: prevented write past end of buffer" +#include "memcpy_common.S" diff --git a/libc/arch-arm/cortex-a15/bionic/memcpy_base.S b/libc/arch-arm/cortex-a15/bionic/memcpy_base.S index 2a73852..aac737d 100644 --- a/libc/arch-arm/cortex-a15/bionic/memcpy_base.S +++ b/libc/arch-arm/cortex-a15/bionic/memcpy_base.S @@ -53,11 +53,7 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -ENTRY_PRIVATE(MEMCPY_BASE) - .cfi_def_cfa_offset 8 - .cfi_rel_offset r0, 0 - .cfi_rel_offset lr, 4 - +.L_memcpy_base: // Assumes that n >= 0, and dst, src are valid pointers. // For any sizes less than 832 use the neon code that doesn't // care about the src alignment. This avoids any checks @@ -168,12 +164,6 @@ ENTRY_PRIVATE(MEMCPY_BASE) eor r3, r0, r1 ands r3, r3, #0x3 bne .L_copy_unknown_alignment -END(MEMCPY_BASE) - -ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED) - .cfi_def_cfa_offset 8 - .cfi_rel_offset r0, 0 - .cfi_rel_offset lr, 4 // To try and improve performance, stack layout changed, // i.e., not keeping the stack looking like users expect @@ -185,7 +175,7 @@ ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED) strd r6, r7, [sp, #-8]! .cfi_adjust_cfa_offset 8 .cfi_rel_offset r6, 0 - .cfi_rel_offset r7, 0 + .cfi_rel_offset r7, 4 strd r8, r9, [sp, #-8]! .cfi_adjust_cfa_offset 8 .cfi_rel_offset r8, 0 @@ -291,10 +281,28 @@ ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED) // Restore registers: optimized pop {r0, pc} ldrd r8, r9, [sp], #8 + .cfi_adjust_cfa_offset -8 + .cfi_restore r8 + .cfi_restore r9 ldrd r6, r7, [sp], #8 + .cfi_adjust_cfa_offset -8 + .cfi_restore r6 + .cfi_restore r7 ldrd r4, r5, [sp], #8 + .cfi_adjust_cfa_offset -8 + .cfi_restore r4 + .cfi_restore r5 pop {r0, pc} + // Put the cfi directives back for the below instructions. + .cfi_adjust_cfa_offset 24 + .cfi_rel_offset r4, 0 + .cfi_rel_offset r5, 4 + .cfi_rel_offset r6, 8 + .cfi_rel_offset r7, 12 + .cfi_rel_offset r8, 16 + .cfi_rel_offset r9, 20 + .L_dst_not_word_aligned: // Align dst to word. rsb ip, ip, #4 @@ -315,4 +323,12 @@ ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED) // Src is guaranteed to be at least word aligned by this point. b .L_word_aligned -END(MEMCPY_BASE_ALIGNED) + + // Undo any cfi directives from above. + .cfi_adjust_cfa_offset -24 + .cfi_restore r4 + .cfi_restore r5 + .cfi_restore r6 + .cfi_restore r7 + .cfi_restore r8 + .cfi_restore r9 diff --git a/libc/arch-arm/cortex-a15/bionic/memcpy_common.S b/libc/arch-arm/cortex-a15/bionic/memcpy_common.S new file mode 100644 index 0000000..464fb46 --- /dev/null +++ b/libc/arch-arm/cortex-a15/bionic/memcpy_common.S @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * Copyright (c) 2013 ARM Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the company may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <private/bionic_asm.h> +#include <private/libc_events.h> + + .text + .syntax unified + .fpu neon + +ENTRY(__memcpy_chk) + cmp r2, r3 + bhi .L_memcpy_chk_fail + + // Fall through to memcpy... +END(__memcpy_chk) + +// Prototype: void *memcpy (void *dst, const void *src, size_t count). +ENTRY(memcpy) + pld [r1, #64] + push {r0, lr} + .cfi_def_cfa_offset 8 + .cfi_rel_offset r0, 0 + .cfi_rel_offset lr, 4 + +#include MEMCPY_BASE + + // Undo the cfi instructions from above. + .cfi_def_cfa_offset 0 + .cfi_restore r0 + .cfi_restore lr +.L_memcpy_chk_fail: + // Preserve lr for backtrace. + push {lr} + .cfi_adjust_cfa_offset 4 + .cfi_rel_offset lr, 0 + + ldr r0, error_message + ldr r1, error_code +1: + add r0, pc + bl __fortify_chk_fail +error_code: + .word BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW +error_message: + .word error_string-(1b+8) +END(memcpy) + + .data +error_string: + .string "memcpy: prevented write past end of buffer" diff --git a/libc/arch-arm/cortex-a15/bionic/strcat.S b/libc/arch-arm/cortex-a15/bionic/strcat.S index b95be94..157cc9f 100644 --- a/libc/arch-arm/cortex-a15/bionic/strcat.S +++ b/libc/arch-arm/cortex-a15/bionic/strcat.S @@ -70,7 +70,7 @@ .macro m_scan_byte ldrb r3, [r0] - cbz r3, strcat_r0_scan_done + cbz r3, .L_strcat_r0_scan_done add r0, #1 .endm // m_scan_byte @@ -84,10 +84,10 @@ ENTRY(strcat) // Quick check to see if src is empty. ldrb r2, [r1] pld [r1, #0] - cbnz r2, strcat_continue + cbnz r2, .L_strcat_continue bx lr -strcat_continue: +.L_strcat_continue: // To speed up really small dst strings, unroll checking the first 4 bytes. m_push m_scan_byte @@ -96,95 +96,102 @@ strcat_continue: m_scan_byte ands r3, r0, #7 - beq strcat_mainloop + beq .L_strcat_mainloop // Align to a double word (64 bits). rsb r3, r3, #8 lsls ip, r3, #31 - beq strcat_align_to_32 + beq .L_strcat_align_to_32 ldrb r5, [r0] - cbz r5, strcat_r0_scan_done + cbz r5, .L_strcat_r0_scan_done add r0, r0, #1 -strcat_align_to_32: - bcc strcat_align_to_64 +.L_strcat_align_to_32: + bcc .L_strcat_align_to_64 ldrb r2, [r0] - cbz r2, strcat_r0_scan_done + cbz r2, .L_strcat_r0_scan_done add r0, r0, #1 ldrb r4, [r0] - cbz r4, strcat_r0_scan_done + cbz r4, .L_strcat_r0_scan_done add r0, r0, #1 -strcat_align_to_64: +.L_strcat_align_to_64: tst r3, #4 - beq strcat_mainloop + beq .L_strcat_mainloop ldr r3, [r0], #4 sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcat_zero_in_second_register - b strcat_mainloop + bne .L_strcat_zero_in_second_register + b .L_strcat_mainloop -strcat_r0_scan_done: +.L_strcat_r0_scan_done: // For short copies, hard-code checking the first 8 bytes since this // new code doesn't win until after about 8 bytes. - m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue - -strcpy_finish: + m_copy_byte reg=r2, cmd=cbz, label=.L_strcpy_finish + m_copy_byte reg=r3, cmd=cbz, label=.L_strcpy_finish + m_copy_byte reg=r4, cmd=cbz, label=.L_strcpy_finish + m_copy_byte reg=r5, cmd=cbz, label=.L_strcpy_finish + m_copy_byte reg=r2, cmd=cbz, label=.L_strcpy_finish + m_copy_byte reg=r3, cmd=cbz, label=.L_strcpy_finish + m_copy_byte reg=r4, cmd=cbz, label=.L_strcpy_finish + m_copy_byte reg=r5, cmd=cbnz, label=.L_strcpy_continue + +.L_strcpy_finish: m_pop -strcpy_continue: +.L_strcpy_continue: ands r3, r0, #7 - beq strcpy_check_src_align + beq .L_strcpy_check_src_align // Align to a double word (64 bits). rsb r3, r3, #8 lsls ip, r3, #31 - beq strcpy_align_to_32 + beq .L_strcpy_align_to_32 ldrb r2, [r1], #1 strb r2, [r0], #1 - cbz r2, strcpy_complete + cbz r2, .L_strcpy_complete -strcpy_align_to_32: - bcc strcpy_align_to_64 +.L_strcpy_align_to_32: + bcc .L_strcpy_align_to_64 ldrb r2, [r1], #1 strb r2, [r0], #1 - cbz r2, strcpy_complete + cbz r2, .L_strcpy_complete ldrb r2, [r1], #1 strb r2, [r0], #1 - cbz r2, strcpy_complete + cbz r2, .L_strcpy_complete -strcpy_align_to_64: +.L_strcpy_align_to_64: tst r3, #4 - beq strcpy_check_src_align - ldr r2, [r1], #4 - - sub ip, r2, #0x01010101 - bic ip, ip, r2 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register - str r2, [r0], #4 + beq .L_strcpy_check_src_align + // Read one byte at a time since we don't know the src alignment + // and we don't want to read into a different page. + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .L_strcpy_complete + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .L_strcpy_complete + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .L_strcpy_complete + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .L_strcpy_complete -strcpy_check_src_align: +.L_strcpy_check_src_align: // At this point dst is aligned to a double word, check if src // is also aligned to a double word. ands r3, r1, #7 - bne strcpy_unaligned_copy + bne .L_strcpy_unaligned_copy .p2align 2 -strcpy_mainloop: +.L_strcpy_mainloop: ldrd r2, r3, [r1], #8 pld [r1, #64] @@ -192,128 +199,128 @@ strcpy_mainloop: sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .L_strcpy_zero_in_first_register sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .L_strcpy_zero_in_second_register strd r2, r3, [r0], #8 - b strcpy_mainloop + b .L_strcpy_mainloop -strcpy_complete: +.L_strcpy_complete: m_pop -strcpy_zero_in_first_register: +.L_strcpy_zero_in_first_register: lsls lr, ip, #17 - bne strcpy_copy1byte - bcs strcpy_copy2bytes + bne .L_strcpy_copy1byte + bcs .L_strcpy_copy2bytes lsls ip, ip, #1 - bne strcpy_copy3bytes + bne .L_strcpy_copy3bytes -strcpy_copy4bytes: +.L_strcpy_copy4bytes: // Copy 4 bytes to the destiniation. str r2, [r0] m_pop -strcpy_copy1byte: +.L_strcpy_copy1byte: strb r2, [r0] m_pop -strcpy_copy2bytes: +.L_strcpy_copy2bytes: strh r2, [r0] m_pop -strcpy_copy3bytes: +.L_strcpy_copy3bytes: strh r2, [r0], #2 lsr r2, #16 strb r2, [r0] m_pop -strcpy_zero_in_second_register: +.L_strcpy_zero_in_second_register: lsls lr, ip, #17 - bne strcpy_copy5bytes - bcs strcpy_copy6bytes + bne .L_strcpy_copy5bytes + bcs .L_strcpy_copy6bytes lsls ip, ip, #1 - bne strcpy_copy7bytes + bne .L_strcpy_copy7bytes // Copy 8 bytes to the destination. strd r2, r3, [r0] m_pop -strcpy_copy5bytes: +.L_strcpy_copy5bytes: str r2, [r0], #4 strb r3, [r0] m_pop -strcpy_copy6bytes: +.L_strcpy_copy6bytes: str r2, [r0], #4 strh r3, [r0] m_pop -strcpy_copy7bytes: +.L_strcpy_copy7bytes: str r2, [r0], #4 strh r3, [r0], #2 lsr r3, #16 strb r3, [r0] m_pop -strcpy_unaligned_copy: +.L_strcpy_unaligned_copy: // Dst is aligned to a double word, while src is at an unknown alignment. // There are 7 different versions of the unaligned copy code // to prevent overreading the src. The mainloop of every single version // will store 64 bits per loop. The difference is how much of src can // be read without potentially crossing a page boundary. tbb [pc, r3] -strcpy_unaligned_branchtable: +.L_strcpy_unaligned_branchtable: .byte 0 - .byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2) + .byte ((.L_strcpy_unalign7 - .L_strcpy_unaligned_branchtable)/2) + .byte ((.L_strcpy_unalign6 - .L_strcpy_unaligned_branchtable)/2) + .byte ((.L_strcpy_unalign5 - .L_strcpy_unaligned_branchtable)/2) + .byte ((.L_strcpy_unalign4 - .L_strcpy_unaligned_branchtable)/2) + .byte ((.L_strcpy_unalign3 - .L_strcpy_unaligned_branchtable)/2) + .byte ((.L_strcpy_unalign2 - .L_strcpy_unaligned_branchtable)/2) + .byte ((.L_strcpy_unalign1 - .L_strcpy_unaligned_branchtable)/2) .p2align 2 // Can read 7 bytes before possibly crossing a page. -strcpy_unalign7: +.L_strcpy_unalign7: ldr r2, [r1], #4 sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .L_strcpy_zero_in_first_register ldrb r3, [r1] - cbz r3, strcpy_unalign7_copy5bytes + cbz r3, .L_strcpy_unalign7_copy5bytes ldrb r4, [r1, #1] - cbz r4, strcpy_unalign7_copy6bytes + cbz r4, .L_strcpy_unalign7_copy6bytes ldrb r5, [r1, #2] - cbz r5, strcpy_unalign7_copy7bytes + cbz r5, .L_strcpy_unalign7_copy7bytes ldr r3, [r1], #4 pld [r1, #64] lsrs ip, r3, #24 strd r2, r3, [r0], #8 - beq strcpy_unalign_return - b strcpy_unalign7 + beq .L_strcpy_unalign_return + b .L_strcpy_unalign7 -strcpy_unalign7_copy5bytes: +.L_strcpy_unalign7_copy5bytes: str r2, [r0], #4 strb r3, [r0] -strcpy_unalign_return: +.L_strcpy_unalign_return: m_pop -strcpy_unalign7_copy6bytes: +.L_strcpy_unalign7_copy6bytes: str r2, [r0], #4 strb r3, [r0], #1 strb r4, [r0], #1 m_pop -strcpy_unalign7_copy7bytes: +.L_strcpy_unalign7_copy7bytes: str r2, [r0], #4 strb r3, [r0], #1 strb r4, [r0], #1 @@ -322,41 +329,41 @@ strcpy_unalign7_copy7bytes: .p2align 2 // Can read 6 bytes before possibly crossing a page. -strcpy_unalign6: +.L_strcpy_unalign6: ldr r2, [r1], #4 sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .L_strcpy_zero_in_first_register ldrb r4, [r1] - cbz r4, strcpy_unalign_copy5bytes + cbz r4, .L_strcpy_unalign_copy5bytes ldrb r5, [r1, #1] - cbz r5, strcpy_unalign_copy6bytes + cbz r5, .L_strcpy_unalign_copy6bytes ldr r3, [r1], #4 pld [r1, #64] tst r3, #0xff0000 - beq strcpy_copy7bytes + beq .L_strcpy_copy7bytes lsrs ip, r3, #24 strd r2, r3, [r0], #8 - beq strcpy_unalign_return - b strcpy_unalign6 + beq .L_strcpy_unalign_return + b .L_strcpy_unalign6 .p2align 2 // Can read 5 bytes before possibly crossing a page. -strcpy_unalign5: +.L_strcpy_unalign5: ldr r2, [r1], #4 sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .L_strcpy_zero_in_first_register ldrb r4, [r1] - cbz r4, strcpy_unalign_copy5bytes + cbz r4, .L_strcpy_unalign_copy5bytes ldr r3, [r1], #4 @@ -365,17 +372,17 @@ strcpy_unalign5: sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .L_strcpy_zero_in_second_register strd r2, r3, [r0], #8 - b strcpy_unalign5 + b .L_strcpy_unalign5 -strcpy_unalign_copy5bytes: +.L_strcpy_unalign_copy5bytes: str r2, [r0], #4 strb r4, [r0] m_pop -strcpy_unalign_copy6bytes: +.L_strcpy_unalign_copy6bytes: str r2, [r0], #4 strb r4, [r0], #1 strb r5, [r0] @@ -383,13 +390,13 @@ strcpy_unalign_copy6bytes: .p2align 2 // Can read 4 bytes before possibly crossing a page. -strcpy_unalign4: +.L_strcpy_unalign4: ldr r2, [r1], #4 sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .L_strcpy_zero_in_first_register ldr r3, [r1], #4 pld [r1, #64] @@ -397,20 +404,20 @@ strcpy_unalign4: sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .L_strcpy_zero_in_second_register strd r2, r3, [r0], #8 - b strcpy_unalign4 + b .L_strcpy_unalign4 .p2align 2 // Can read 3 bytes before possibly crossing a page. -strcpy_unalign3: +.L_strcpy_unalign3: ldrb r2, [r1] - cbz r2, strcpy_unalign3_copy1byte + cbz r2, .L_strcpy_unalign3_copy1byte ldrb r3, [r1, #1] - cbz r3, strcpy_unalign3_copy2bytes + cbz r3, .L_strcpy_unalign3_copy2bytes ldrb r4, [r1, #2] - cbz r4, strcpy_unalign3_copy3bytes + cbz r4, .L_strcpy_unalign3_copy3bytes ldr r2, [r1], #4 ldr r3, [r1], #4 @@ -418,26 +425,26 @@ strcpy_unalign3: pld [r1, #64] lsrs lr, r2, #24 - beq strcpy_copy4bytes + beq .L_strcpy_copy4bytes sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .L_strcpy_zero_in_second_register strd r2, r3, [r0], #8 - b strcpy_unalign3 + b .L_strcpy_unalign3 -strcpy_unalign3_copy1byte: +.L_strcpy_unalign3_copy1byte: strb r2, [r0] m_pop -strcpy_unalign3_copy2bytes: +.L_strcpy_unalign3_copy2bytes: strb r2, [r0], #1 strb r3, [r0] m_pop -strcpy_unalign3_copy3bytes: +.L_strcpy_unalign3_copy3bytes: strb r2, [r0], #1 strb r3, [r0], #1 strb r4, [r0] @@ -445,34 +452,34 @@ strcpy_unalign3_copy3bytes: .p2align 2 // Can read 2 bytes before possibly crossing a page. -strcpy_unalign2: +.L_strcpy_unalign2: ldrb r2, [r1] - cbz r2, strcpy_unalign_copy1byte + cbz r2, .L_strcpy_unalign_copy1byte ldrb r4, [r1, #1] - cbz r4, strcpy_unalign_copy2bytes + cbz r4, .L_strcpy_unalign_copy2bytes ldr r2, [r1], #4 ldr r3, [r1], #4 pld [r1, #64] tst r2, #0xff0000 - beq strcpy_copy3bytes + beq .L_strcpy_copy3bytes lsrs ip, r2, #24 - beq strcpy_copy4bytes + beq .L_strcpy_copy4bytes sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .L_strcpy_zero_in_second_register strd r2, r3, [r0], #8 - b strcpy_unalign2 + b .L_strcpy_unalign2 .p2align 2 // Can read 1 byte before possibly crossing a page. -strcpy_unalign1: +.L_strcpy_unalign1: ldrb r2, [r1] - cbz r2, strcpy_unalign_copy1byte + cbz r2, .L_strcpy_unalign_copy1byte ldr r2, [r1], #4 ldr r3, [r1], #4 @@ -482,27 +489,27 @@ strcpy_unalign1: sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .L_strcpy_zero_in_first_register sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .L_strcpy_zero_in_second_register strd r2, r3, [r0], #8 - b strcpy_unalign1 + b .L_strcpy_unalign1 -strcpy_unalign_copy1byte: +.L_strcpy_unalign_copy1byte: strb r2, [r0] m_pop -strcpy_unalign_copy2bytes: +.L_strcpy_unalign_copy2bytes: strb r2, [r0], #1 strb r4, [r0] m_pop .p2align 2 -strcat_mainloop: +.L_strcat_mainloop: ldrd r2, r3, [r0], #8 pld [r0, #64] @@ -510,59 +517,59 @@ strcat_mainloop: sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcat_zero_in_first_register + bne .L_strcat_zero_in_first_register sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcat_zero_in_second_register - b strcat_mainloop + bne .L_strcat_zero_in_second_register + b .L_strcat_mainloop -strcat_zero_in_first_register: +.L_strcat_zero_in_first_register: // Prefetch the src now, it's going to be used soon. pld [r1, #0] lsls lr, ip, #17 - bne strcat_sub8 - bcs strcat_sub7 + bne .L_strcat_sub8 + bcs .L_strcat_sub7 lsls ip, ip, #1 - bne strcat_sub6 + bne .L_strcat_sub6 sub r0, r0, #5 - b strcat_r0_scan_done + b .L_strcat_r0_scan_done -strcat_sub8: +.L_strcat_sub8: sub r0, r0, #8 - b strcat_r0_scan_done + b .L_strcat_r0_scan_done -strcat_sub7: +.L_strcat_sub7: sub r0, r0, #7 - b strcat_r0_scan_done + b .L_strcat_r0_scan_done -strcat_sub6: +.L_strcat_sub6: sub r0, r0, #6 - b strcat_r0_scan_done + b .L_strcat_r0_scan_done -strcat_zero_in_second_register: +.L_strcat_zero_in_second_register: // Prefetch the src now, it's going to be used soon. pld [r1, #0] lsls lr, ip, #17 - bne strcat_sub4 - bcs strcat_sub3 + bne .L_strcat_sub4 + bcs .L_strcat_sub3 lsls ip, ip, #1 - bne strcat_sub2 + bne .L_strcat_sub2 sub r0, r0, #1 - b strcat_r0_scan_done + b .L_strcat_r0_scan_done -strcat_sub4: +.L_strcat_sub4: sub r0, r0, #4 - b strcat_r0_scan_done + b .L_strcat_r0_scan_done -strcat_sub3: +.L_strcat_sub3: sub r0, r0, #3 - b strcat_r0_scan_done + b .L_strcat_r0_scan_done -strcat_sub2: +.L_strcat_sub2: sub r0, r0, #2 - b strcat_r0_scan_done + b .L_strcat_r0_scan_done END(strcat) diff --git a/libc/arch-arm/cortex-a15/bionic/string_copy.S b/libc/arch-arm/cortex-a15/bionic/string_copy.S index 20f0e91..92d1c98 100644 --- a/libc/arch-arm/cortex-a15/bionic/string_copy.S +++ b/libc/arch-arm/cortex-a15/bionic/string_copy.S @@ -149,13 +149,20 @@ ENTRY(strcpy) .Lstringcopy_align_to_64: tst r3, #4 beq .Lstringcopy_check_src_align - ldr r2, [r1], #4 - - sub ip, r2, #0x01010101 - bic ip, ip, r2 - ands ip, ip, #0x80808080 - bne .Lstringcopy_zero_in_first_register - str r2, [r0], #4 + // Read one byte at a time since we don't have any idea about the alignment + // of the source and we don't want to read into a different page. + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .Lstringcopy_complete + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .Lstringcopy_complete + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .Lstringcopy_complete + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .Lstringcopy_complete .Lstringcopy_check_src_align: // At this point dst is aligned to a double word, check if src diff --git a/libc/arch-arm/cortex-a15/bionic/strlen.S b/libc/arch-arm/cortex-a15/bionic/strlen.S index 9a0ce62..4fd6284 100644 --- a/libc/arch-arm/cortex-a15/bionic/strlen.S +++ b/libc/arch-arm/cortex-a15/bionic/strlen.S @@ -65,38 +65,38 @@ ENTRY(strlen) mov r1, r0 ands r3, r0, #7 - beq mainloop + beq .L_mainloop // Align to a double word (64 bits). rsb r3, r3, #8 lsls ip, r3, #31 - beq align_to_32 + beq .L_align_to_32 ldrb r2, [r1], #1 - cbz r2, update_count_and_return + cbz r2, .L_update_count_and_return -align_to_32: - bcc align_to_64 +.L_align_to_32: + bcc .L_align_to_64 ands ip, r3, #2 - beq align_to_64 + beq .L_align_to_64 ldrb r2, [r1], #1 - cbz r2, update_count_and_return + cbz r2, .L_update_count_and_return ldrb r2, [r1], #1 - cbz r2, update_count_and_return + cbz r2, .L_update_count_and_return -align_to_64: +.L_align_to_64: tst r3, #4 - beq mainloop + beq .L_mainloop ldr r3, [r1], #4 sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne zero_in_second_register + bne .L_zero_in_second_register .p2align 2 -mainloop: +.L_mainloop: ldrd r2, r3, [r1], #8 pld [r1, #64] @@ -104,62 +104,62 @@ mainloop: sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne zero_in_first_register + bne .L_zero_in_first_register sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne zero_in_second_register - b mainloop + bne .L_zero_in_second_register + b .L_mainloop -update_count_and_return: +.L_update_count_and_return: sub r0, r1, r0 sub r0, r0, #1 bx lr -zero_in_first_register: +.L_zero_in_first_register: sub r0, r1, r0 lsls r3, ip, #17 - bne sub8_and_return - bcs sub7_and_return + bne .L_sub8_and_return + bcs .L_sub7_and_return lsls ip, ip, #1 - bne sub6_and_return + bne .L_sub6_and_return sub r0, r0, #5 bx lr -sub8_and_return: +.L_sub8_and_return: sub r0, r0, #8 bx lr -sub7_and_return: +.L_sub7_and_return: sub r0, r0, #7 bx lr -sub6_and_return: +.L_sub6_and_return: sub r0, r0, #6 bx lr -zero_in_second_register: +.L_zero_in_second_register: sub r0, r1, r0 lsls r3, ip, #17 - bne sub4_and_return - bcs sub3_and_return + bne .L_sub4_and_return + bcs .L_sub3_and_return lsls ip, ip, #1 - bne sub2_and_return + bne .L_sub2_and_return sub r0, r0, #1 bx lr -sub4_and_return: +.L_sub4_and_return: sub r0, r0, #4 bx lr -sub3_and_return: +.L_sub3_and_return: sub r0, r0, #3 bx lr -sub2_and_return: +.L_sub2_and_return: sub r0, r0, #2 bx lr END(strlen) diff --git a/libc/arch-arm/cortex-a15/cortex-a15.mk b/libc/arch-arm/cortex-a15/cortex-a15.mk index 6fa3270..202a3bf 100644 --- a/libc/arch-arm/cortex-a15/cortex-a15.mk +++ b/libc/arch-arm/cortex-a15/cortex-a15.mk @@ -10,6 +10,7 @@ libc_bionic_src_files_arm += \ arch-arm/cortex-a15/bionic/strlen.S \ libc_bionic_src_files_arm += \ + arch-arm/generic/bionic/memchr.S \ arch-arm/generic/bionic/memcmp.S \ libc_bionic_src_files_arm += \ diff --git a/libc/arch-arm/cortex-a53.a57/cortex-a53.a57.mk b/libc/arch-arm/cortex-a53.a57/cortex-a53.a57.mk new file mode 100644 index 0000000..5d7efc6 --- /dev/null +++ b/libc/arch-arm/cortex-a53.a57/cortex-a53.a57.mk @@ -0,0 +1,22 @@ +# This file represents the best optimized routines that are the middle +# ground when running on a big/little system that is cortex-a57/cortex-a53. +# The cortex-a7 optimized routines, and the cortex-a53 optimized routines +# decrease performance on cortex-a57 processors by as much as 20%. + +libc_bionic_src_files_arm += \ + arch-arm/cortex-a15/bionic/memcpy.S \ + arch-arm/cortex-a15/bionic/memset.S \ + arch-arm/cortex-a15/bionic/stpcpy.S \ + arch-arm/cortex-a15/bionic/strcat.S \ + arch-arm/cortex-a15/bionic/__strcat_chk.S \ + arch-arm/cortex-a15/bionic/strcmp.S \ + arch-arm/cortex-a15/bionic/strcpy.S \ + arch-arm/cortex-a15/bionic/__strcpy_chk.S \ + arch-arm/cortex-a15/bionic/strlen.S \ + +libc_bionic_src_files_arm += \ + arch-arm/generic/bionic/memcmp.S \ + arch-arm/generic/bionic/memchr.S + +libc_bionic_src_files_arm += \ + arch-arm/denver/bionic/memmove.S \ diff --git a/libc/arch-arm/cortex-a53/bionic/__strcat_chk.S b/libc/arch-arm/cortex-a53/bionic/__strcat_chk.S new file mode 100644 index 0000000..c5bc98a --- /dev/null +++ b/libc/arch-arm/cortex-a53/bionic/__strcat_chk.S @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +// Indicate which memcpy base file to include. +#define MEMCPY_BASE "arch-arm/cortex-a53/bionic/memcpy_base.S" + +#include "arch-arm/cortex-a15/bionic/__strcat_chk_common.S" diff --git a/libc/arch-arm/cortex-a53/bionic/__strcpy_chk.S b/libc/arch-arm/cortex-a53/bionic/__strcpy_chk.S new file mode 100644 index 0000000..1f8945d --- /dev/null +++ b/libc/arch-arm/cortex-a53/bionic/__strcpy_chk.S @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +// Indicate which memcpy base file to include. +#define MEMCPY_BASE "arch-arm/cortex-a53/bionic/memcpy_base.S" + +#include "arch-arm/cortex-a15/bionic/__strcpy_chk_common.S" diff --git a/libc/arch-arm/cortex-a53/bionic/memcpy.S b/libc/arch-arm/cortex-a53/bionic/memcpy.S new file mode 100644 index 0000000..664f574 --- /dev/null +++ b/libc/arch-arm/cortex-a53/bionic/memcpy.S @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +// Indicate which memcpy base file to include. +#define MEMCPY_BASE "arch-arm/cortex-a53/bionic/memcpy_base.S" + +#include "arch-arm/cortex-a15/bionic/memcpy_common.S" diff --git a/libc/arch-arm/cortex-a53/bionic/memcpy_base.S b/libc/arch-arm/cortex-a53/bionic/memcpy_base.S new file mode 100644 index 0000000..2749fc8 --- /dev/null +++ b/libc/arch-arm/cortex-a53/bionic/memcpy_base.S @@ -0,0 +1,143 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * Copyright (c) 2013 ARM Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the company may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +.L_memcpy_base: + // Assumes that n >= 0, and dst, src are valid pointers. + cmp r2, #16 + blo .L_copy_less_than_16_unknown_align + +.L_copy_unknown_alignment: + // Unknown alignment of src and dst. + // Assumes that the first few bytes have already been prefetched. + + // Align destination to 128 bits. The mainloop store instructions + // require this alignment or they will throw an exception. + rsb r3, r0, #0 + ands r3, r3, #0xF + beq 2f + + // Copy up to 15 bytes (count in r3). + sub r2, r2, r3 + movs ip, r3, lsl #31 + + itt mi + ldrbmi lr, [r1], #1 + strbmi lr, [r0], #1 + itttt cs + ldrbcs ip, [r1], #1 + ldrbcs lr, [r1], #1 + strbcs ip, [r0], #1 + strbcs lr, [r0], #1 + + movs ip, r3, lsl #29 + bge 1f + // Copies 4 bytes, dst 32 bits aligned before, at least 64 bits after. + vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]! + vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]! +1: bcc 2f + // Copies 8 bytes, dst 64 bits aligned before, at least 128 bits after. + vld1.8 {d0}, [r1]! + vst1.8 {d0}, [r0, :64]! + +2: // Make sure we have at least 64 bytes to copy. + subs r2, r2, #64 + blo 2f + +1: // The main loop copies 64 bytes at a time. + vld1.8 {d0 - d3}, [r1]! + vld1.8 {d4 - d7}, [r1]! + subs r2, r2, #64 + vstmia r0!, {d0 - d7} + pld [r1, #(64*10)] + bhs 1b + +2: // Fix-up the remaining count and make sure we have >= 32 bytes left. + adds r2, r2, #32 + blo 3f + + // 32 bytes. These cache lines were already preloaded. + vld1.8 {d0 - d3}, [r1]! + sub r2, r2, #32 + vst1.8 {d0 - d3}, [r0, :128]! +3: // Less than 32 left. + add r2, r2, #32 + tst r2, #0x10 + beq .L_copy_less_than_16_unknown_align + // Copies 16 bytes, destination 128 bits aligned. + vld1.8 {d0, d1}, [r1]! + vst1.8 {d0, d1}, [r0, :128]! + +.L_copy_less_than_16_unknown_align: + // Copy up to 15 bytes (count in r2). + movs ip, r2, lsl #29 + bcc 1f + vld1.8 {d0}, [r1]! + vst1.8 {d0}, [r0]! +1: bge 2f + vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]! + vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]! + +2: // Copy 0 to 4 bytes. + lsls r2, r2, #31 + itt ne + ldrbne lr, [r1], #1 + strbne lr, [r0], #1 + itttt cs + ldrbcs ip, [r1], #1 + ldrbcs lr, [r1] + strbcs ip, [r0], #1 + strbcs lr, [r0] + + pop {r0, pc} diff --git a/libc/arch-arm/cortex-a53/cortex-a53.mk b/libc/arch-arm/cortex-a53/cortex-a53.mk index b5c337c..14aaa71 100644 --- a/libc/arch-arm/cortex-a53/cortex-a53.mk +++ b/libc/arch-arm/cortex-a53/cortex-a53.mk @@ -1 +1,21 @@ -include bionic/libc/arch-arm/cortex-a7/cortex-a7.mk +libc_bionic_src_files_arm += \ + arch-arm/cortex-a53/bionic/memcpy.S \ + arch-arm/cortex-a53/bionic/__strcat_chk.S \ + arch-arm/cortex-a53/bionic/__strcpy_chk.S \ + +libc_bionic_src_files_arm += \ + arch-arm/cortex-a7/bionic/memset.S \ + +libc_bionic_src_files_arm += \ + arch-arm/cortex-a15/bionic/stpcpy.S \ + arch-arm/cortex-a15/bionic/strcat.S \ + arch-arm/cortex-a15/bionic/strcmp.S \ + arch-arm/cortex-a15/bionic/strcpy.S \ + arch-arm/cortex-a15/bionic/strlen.S \ + +libc_bionic_src_files_arm += \ + arch-arm/generic/bionic/memchr.S \ + arch-arm/generic/bionic/memcmp.S \ + +libc_bionic_src_files_arm += \ + arch-arm/denver/bionic/memmove.S \ diff --git a/libc/arch-arm/cortex-a7/bionic/memset.S b/libc/arch-arm/cortex-a7/bionic/memset.S new file mode 100644 index 0000000..6365b06 --- /dev/null +++ b/libc/arch-arm/cortex-a7/bionic/memset.S @@ -0,0 +1,180 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <machine/cpu-features.h> +#include <private/bionic_asm.h> +#include <private/libc_events.h> + + /* + * Optimized memset() for ARM. + * + * memset() returns its first argument. + */ + + .fpu neon + .syntax unified + +ENTRY(__memset_chk) + cmp r2, r3 + bls .L_done + + // Preserve lr for backtrace. + push {lr} + .cfi_def_cfa_offset 4 + .cfi_rel_offset lr, 0 + + ldr r0, error_message + ldr r1, error_code +1: + add r0, pc + bl __fortify_chk_fail +error_code: + .word BIONIC_EVENT_MEMSET_BUFFER_OVERFLOW +error_message: + .word error_string-(1b+8) +END(__memset_chk) + +ENTRY(bzero) + mov r2, r1 + mov r1, #0 +.L_done: + // Fall through to memset... +END(bzero) + +ENTRY(memset) + mov r3, r0 + // At this point only d0, d1 are going to be used below. + vdup.8 q0, r1 + cmp r2, #16 + blo .L_set_less_than_16_unknown_align + +.L_check_alignment: + // Align destination to a double word to avoid the store crossing + // a cache line boundary. + ands ip, r3, #7 + bne .L_do_double_word_align + +.L_double_word_aligned: + // Duplicate since the less than 64 can use d2, d3. + vmov q1, q0 + subs r2, #64 + blo .L_set_less_than_64 + + // Duplicate the copy value so that we can store 64 bytes at a time. + vmov q2, q0 + vmov q3, q0 + +1: // Main loop stores 64 bytes at a time. + subs r2, #64 + vstmia r3!, {d0 - d7} + bge 1b + +.L_set_less_than_64: + // Restore r2 to the count of bytes left to set. + add r2, #64 + lsls ip, r2, #27 + bcc .L_set_less_than_32 + // Set 32 bytes. + vstmia r3!, {d0 - d3} + +.L_set_less_than_32: + bpl .L_set_less_than_16 + // Set 16 bytes. + vstmia r3!, {d0, d1} + +.L_set_less_than_16: + // Less than 16 bytes to set. + lsls ip, r2, #29 + bcc .L_set_less_than_8 + + // Set 8 bytes. + vstmia r3!, {d0} + +.L_set_less_than_8: + bpl .L_set_less_than_4 + // Set 4 bytes + vst1.32 {d0[0]}, [r3]! + +.L_set_less_than_4: + lsls ip, r2, #31 + it ne + strbne r1, [r3], #1 + itt cs + strbcs r1, [r3], #1 + strbcs r1, [r3] + bx lr + +.L_do_double_word_align: + rsb ip, ip, #8 + sub r2, r2, ip + + // Do this comparison now, otherwise we'll need to save a + // register to the stack since we've used all available + // registers. + cmp ip, #4 + blo 1f + + // Need to do a four byte copy. + movs ip, ip, lsl #31 + it mi + strbmi r1, [r3], #1 + itt cs + strbcs r1, [r3], #1 + strbcs r1, [r3], #1 + vst1.32 {d0[0]}, [r3]! + b .L_double_word_aligned + +1: + // No four byte copy. + movs ip, ip, lsl #31 + it mi + strbmi r1, [r3], #1 + itt cs + strbcs r1, [r3], #1 + strbcs r1, [r3], #1 + b .L_double_word_aligned + +.L_set_less_than_16_unknown_align: + // Set up to 15 bytes. + movs ip, r2, lsl #29 + bcc 1f + vst1.8 {d0}, [r3]! +1: bge 2f + vst1.32 {d0[0]}, [r3]! +2: movs ip, r2, lsl #31 + it mi + strbmi r1, [r3], #1 + itt cs + strbcs r1, [r3], #1 + strbcs r1, [r3], #1 + bx lr +END(memset) + + .data +error_string: + .string "memset: prevented write past end of buffer" diff --git a/libc/arch-arm/cortex-a7/cortex-a7.mk b/libc/arch-arm/cortex-a7/cortex-a7.mk index 9af03d9..3629a57 100644 --- a/libc/arch-arm/cortex-a7/cortex-a7.mk +++ b/libc/arch-arm/cortex-a7/cortex-a7.mk @@ -1 +1,19 @@ -include bionic/libc/arch-arm/cortex-a15/cortex-a15.mk +libc_bionic_src_files_arm += \ + arch-arm/cortex-a7/bionic/memset.S \ + +libc_bionic_src_files_arm += \ + arch-arm/cortex-a15/bionic/memcpy.S \ + arch-arm/cortex-a15/bionic/stpcpy.S \ + arch-arm/cortex-a15/bionic/strcat.S \ + arch-arm/cortex-a15/bionic/__strcat_chk.S \ + arch-arm/cortex-a15/bionic/strcmp.S \ + arch-arm/cortex-a15/bionic/strcpy.S \ + arch-arm/cortex-a15/bionic/__strcpy_chk.S \ + arch-arm/cortex-a15/bionic/strlen.S \ + +libc_bionic_src_files_arm += \ + arch-arm/generic/bionic/memchr.S \ + arch-arm/generic/bionic/memcmp.S \ + +libc_bionic_src_files_arm += \ + arch-arm/denver/bionic/memmove.S \ diff --git a/libc/arch-arm/cortex-a9/bionic/memcpy_base.S b/libc/arch-arm/cortex-a9/bionic/memcpy_base.S index 5e81305..6ab5a69 100644 --- a/libc/arch-arm/cortex-a9/bionic/memcpy_base.S +++ b/libc/arch-arm/cortex-a9/bionic/memcpy_base.S @@ -133,8 +133,7 @@ ENTRY_PRIVATE(MEMCPY_BASE) strbcs ip, [r0], #1 strbcs lr, [r0], #1 - ldmfd sp!, {r0, lr} - bx lr + ldmfd sp!, {r0, pc} END(MEMCPY_BASE) ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED) diff --git a/libc/arch-arm/cortex-a9/bionic/memset.S b/libc/arch-arm/cortex-a9/bionic/memset.S index 8ee6ac2..b39fcc4 100644 --- a/libc/arch-arm/cortex-a9/bionic/memset.S +++ b/libc/arch-arm/cortex-a9/bionic/memset.S @@ -69,12 +69,9 @@ END(bzero) ENTRY(memset) // The neon memset only wins for less than 132. cmp r2, #132 - bhi __memset_large_copy - - stmfd sp!, {r0} - .cfi_def_cfa_offset 4 - .cfi_rel_offset r0, 0 + bhi .L_memset_large_copy + mov r3, r0 vdup.8 q0, r1 /* make sure we have at least 32 bytes to write */ @@ -84,7 +81,7 @@ ENTRY(memset) 1: /* The main loop writes 32 bytes at a time */ subs r2, r2, #32 - vst1.8 {d0 - d3}, [r0]! + vst1.8 {d0 - d3}, [r3]! bhs 1b 2: /* less than 32 left */ @@ -93,22 +90,20 @@ ENTRY(memset) beq 3f // writes 16 bytes, 128-bits aligned - vst1.8 {d0, d1}, [r0]! + vst1.8 {d0, d1}, [r3]! 3: /* write up to 15-bytes (count in r2) */ movs ip, r2, lsl #29 bcc 1f - vst1.8 {d0}, [r0]! + vst1.8 {d0}, [r3]! 1: bge 2f - vst1.32 {d0[0]}, [r0]! + vst1.32 {d0[0]}, [r3]! 2: movs ip, r2, lsl #31 - strbmi r1, [r0], #1 - strbcs r1, [r0], #1 - strbcs r1, [r0], #1 - ldmfd sp!, {r0} + strbmi r1, [r3], #1 + strbcs r1, [r3], #1 + strbcs r1, [r3], #1 bx lr -END(memset) -ENTRY_PRIVATE(__memset_large_copy) +.L_memset_large_copy: /* compute the offset to align the destination * offset = (4-(src&3))&3 = -src & 3 */ @@ -136,8 +131,7 @@ ENTRY_PRIVATE(__memset_large_copy) strbcs r1, [r0], #1 strbmi r1, [r0], #1 subs r2, r2, r3 - popls {r0, r4-r7, lr} /* return */ - bxls lr + popls {r0, r4-r7, pc} /* return */ /* align the destination to a cache-line */ mov r12, r1 @@ -180,9 +174,8 @@ ENTRY_PRIVATE(__memset_large_copy) strhmi r1, [r0], #2 movs r2, r2, lsl #2 strbcs r1, [r0] - ldmfd sp!, {r0, r4-r7, lr} - bx lr -END(__memset_large_copy) + ldmfd sp!, {r0, r4-r7, pc} +END(memset) .data error_string: diff --git a/libc/arch-arm/cortex-a9/bionic/strcat.S b/libc/arch-arm/cortex-a9/bionic/strcat.S index f5a855e..9077a74 100644 --- a/libc/arch-arm/cortex-a9/bionic/strcat.S +++ b/libc/arch-arm/cortex-a9/bionic/strcat.S @@ -70,7 +70,7 @@ .macro m_scan_byte ldrb r3, [r0] - cbz r3, strcat_r0_scan_done + cbz r3, .Lstrcat_r0_scan_done add r0, #1 .endm // m_scan_byte @@ -84,10 +84,10 @@ ENTRY(strcat) // Quick check to see if src is empty. ldrb r2, [r1] pld [r1, #0] - cbnz r2, strcat_continue + cbnz r2, .Lstrcat_continue bx lr -strcat_continue: +.Lstrcat_continue: // To speed up really small dst strings, unroll checking the first 4 bytes. m_push m_scan_byte @@ -96,10 +96,10 @@ strcat_continue: m_scan_byte ands r3, r0, #7 - bne strcat_align_src + bne .Lstrcat_align_src .p2align 2 -strcat_mainloop: +.Lstrcat_mainloop: ldmia r0!, {r2, r3} pld [r0, #64] @@ -107,28 +107,28 @@ strcat_mainloop: sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcat_zero_in_first_register + bne .Lstrcat_zero_in_first_register sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcat_zero_in_second_register - b strcat_mainloop + bne .Lstrcat_zero_in_second_register + b .Lstrcat_mainloop -strcat_zero_in_first_register: +.Lstrcat_zero_in_first_register: sub r0, r0, #4 -strcat_zero_in_second_register: +.Lstrcat_zero_in_second_register: // Check for zero in byte 0. tst ip, #0x80 it ne subne r0, r0, #4 - bne strcat_r0_scan_done + bne .Lstrcat_r0_scan_done // Check for zero in byte 1. tst ip, #0x8000 it ne subne r0, r0, #3 - bne strcat_r0_scan_done + bne .Lstrcat_r0_scan_done // Check for zero in byte 2. tst ip, #0x800000 it ne @@ -137,33 +137,33 @@ strcat_zero_in_second_register: // Zero is in byte 3. subeq r0, r0, #1 -strcat_r0_scan_done: +.Lstrcat_r0_scan_done: // Unroll the first 8 bytes that will be copied. - m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue - -strcpy_finish: + m_copy_byte reg=r2, cmd=cbz, label=.Lstrcpy_finish + m_copy_byte reg=r3, cmd=cbz, label=.Lstrcpy_finish + m_copy_byte reg=r4, cmd=cbz, label=.Lstrcpy_finish + m_copy_byte reg=r5, cmd=cbz, label=.Lstrcpy_finish + m_copy_byte reg=r2, cmd=cbz, label=.Lstrcpy_finish + m_copy_byte reg=r3, cmd=cbz, label=.Lstrcpy_finish + m_copy_byte reg=r4, cmd=cbz, label=.Lstrcpy_finish + m_copy_byte reg=r5, cmd=cbnz, label=.Lstrcpy_continue + +.Lstrcpy_finish: m_ret inst=pop -strcpy_continue: +.Lstrcpy_continue: pld [r1, #0] ands r3, r0, #7 - bne strcpy_align_dst + bne .Lstrcpy_align_dst -strcpy_check_src_align: +.Lstrcpy_check_src_align: // At this point dst is aligned to a double word, check if src // is also aligned to a double word. ands r3, r1, #7 - bne strcpy_unaligned_copy + bne .Lstrcpy_unaligned_copy .p2align 2 -strcpy_mainloop: +.Lstrcpy_mainloop: ldmia r1!, {r2, r3} pld [r1, #64] @@ -171,17 +171,17 @@ strcpy_mainloop: sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .Lstrcpy_zero_in_first_register sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .Lstrcpy_zero_in_second_register stmia r0!, {r2, r3} - b strcpy_mainloop + b .Lstrcpy_mainloop -strcpy_zero_in_first_register: +.Lstrcpy_zero_in_first_register: lsls lr, ip, #17 itt ne strbne r2, [r0] @@ -198,7 +198,7 @@ strcpy_zero_in_first_register: strb r3, [r0] m_ret inst=pop -strcpy_zero_in_second_register: +.Lstrcpy_zero_in_second_register: lsls lr, ip, #17 ittt ne stmiane r0!, {r2} @@ -218,18 +218,18 @@ strcpy_zero_in_second_register: strb r4, [r0] m_ret inst=pop -strcpy_align_dst: +.Lstrcpy_align_dst: // Align to a double word (64 bits). rsb r3, r3, #8 lsls ip, r3, #31 - beq strcpy_align_to_32 + beq .Lstrcpy_align_to_32 ldrb r2, [r1], #1 strb r2, [r0], #1 - cbz r2, strcpy_complete + cbz r2, .Lstrcpy_complete -strcpy_align_to_32: - bcc strcpy_align_to_64 +.Lstrcpy_align_to_32: + bcc .Lstrcpy_align_to_64 ldrb r4, [r1], #1 strb r4, [r0], #1 @@ -242,76 +242,83 @@ strcpy_align_to_32: it eq m_ret inst=popeq -strcpy_align_to_64: +.Lstrcpy_align_to_64: tst r3, #4 - beq strcpy_check_src_align - ldr r2, [r1], #4 - - sub ip, r2, #0x01010101 - bic ip, ip, r2 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register - stmia r0!, {r2} - b strcpy_check_src_align + beq .Lstrcpy_check_src_align + // Read one byte at a time since we don't know the src alignment + // and we don't want to read into a different page. + ldrb r4, [r1], #1 + strb r4, [r0], #1 + cbz r4, .Lstrcpy_complete + ldrb r5, [r1], #1 + strb r5, [r0], #1 + cbz r5, .Lstrcpy_complete + ldrb r4, [r1], #1 + strb r4, [r0], #1 + cbz r4, .Lstrcpy_complete + ldrb r5, [r1], #1 + strb r5, [r0], #1 + cbz r5, .Lstrcpy_complete + b .Lstrcpy_check_src_align -strcpy_complete: +.Lstrcpy_complete: m_ret inst=pop -strcpy_unaligned_copy: +.Lstrcpy_unaligned_copy: // Dst is aligned to a double word, while src is at an unknown alignment. // There are 7 different versions of the unaligned copy code // to prevent overreading the src. The mainloop of every single version // will store 64 bits per loop. The difference is how much of src can // be read without potentially crossing a page boundary. tbb [pc, r3] -strcpy_unaligned_branchtable: +.Lstrcpy_unaligned_branchtable: .byte 0 - .byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2) + .byte ((.Lstrcpy_unalign7 - .Lstrcpy_unaligned_branchtable)/2) + .byte ((.Lstrcpy_unalign6 - .Lstrcpy_unaligned_branchtable)/2) + .byte ((.Lstrcpy_unalign5 - .Lstrcpy_unaligned_branchtable)/2) + .byte ((.Lstrcpy_unalign4 - .Lstrcpy_unaligned_branchtable)/2) + .byte ((.Lstrcpy_unalign3 - .Lstrcpy_unaligned_branchtable)/2) + .byte ((.Lstrcpy_unalign2 - .Lstrcpy_unaligned_branchtable)/2) + .byte ((.Lstrcpy_unalign1 - .Lstrcpy_unaligned_branchtable)/2) .p2align 2 // Can read 7 bytes before possibly crossing a page. -strcpy_unalign7: +.Lstrcpy_unalign7: ldr r2, [r1], #4 sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .Lstrcpy_zero_in_first_register ldrb r3, [r1] - cbz r3, strcpy_unalign7_copy5bytes + cbz r3, .Lstrcpy_unalign7_copy5bytes ldrb r4, [r1, #1] - cbz r4, strcpy_unalign7_copy6bytes + cbz r4, .Lstrcpy_unalign7_copy6bytes ldrb r5, [r1, #2] - cbz r5, strcpy_unalign7_copy7bytes + cbz r5, .Lstrcpy_unalign7_copy7bytes ldr r3, [r1], #4 pld [r1, #64] lsrs ip, r3, #24 stmia r0!, {r2, r3} - beq strcpy_unalign_return - b strcpy_unalign7 + beq .Lstrcpy_unalign_return + b .Lstrcpy_unalign7 -strcpy_unalign7_copy5bytes: +.Lstrcpy_unalign7_copy5bytes: stmia r0!, {r2} strb r3, [r0] -strcpy_unalign_return: +.Lstrcpy_unalign_return: m_ret inst=pop -strcpy_unalign7_copy6bytes: +.Lstrcpy_unalign7_copy6bytes: stmia r0!, {r2} strb r3, [r0], #1 strb r4, [r0], #1 m_ret inst=pop -strcpy_unalign7_copy7bytes: +.Lstrcpy_unalign7_copy7bytes: stmia r0!, {r2} strb r3, [r0], #1 strb r4, [r0], #1 @@ -320,30 +327,30 @@ strcpy_unalign7_copy7bytes: .p2align 2 // Can read 6 bytes before possibly crossing a page. -strcpy_unalign6: +.Lstrcpy_unalign6: ldr r2, [r1], #4 sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .Lstrcpy_zero_in_first_register ldrb r4, [r1] - cbz r4, strcpy_unalign_copy5bytes + cbz r4, .Lstrcpy_unalign_copy5bytes ldrb r5, [r1, #1] - cbz r5, strcpy_unalign_copy6bytes + cbz r5, .Lstrcpy_unalign_copy6bytes ldr r3, [r1], #4 pld [r1, #64] tst r3, #0xff0000 - beq strcpy_unalign6_copy7bytes + beq .Lstrcpy_unalign6_copy7bytes lsrs ip, r3, #24 stmia r0!, {r2, r3} - beq strcpy_unalign_return - b strcpy_unalign6 + beq .Lstrcpy_unalign_return + b .Lstrcpy_unalign6 -strcpy_unalign6_copy7bytes: +.Lstrcpy_unalign6_copy7bytes: stmia r0!, {r2} strh r3, [r0], #2 lsr r3, #16 @@ -352,16 +359,16 @@ strcpy_unalign6_copy7bytes: .p2align 2 // Can read 5 bytes before possibly crossing a page. -strcpy_unalign5: +.Lstrcpy_unalign5: ldr r2, [r1], #4 sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .Lstrcpy_zero_in_first_register ldrb r4, [r1] - cbz r4, strcpy_unalign_copy5bytes + cbz r4, .Lstrcpy_unalign_copy5bytes ldr r3, [r1], #4 @@ -370,17 +377,17 @@ strcpy_unalign5: sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .Lstrcpy_zero_in_second_register stmia r0!, {r2, r3} - b strcpy_unalign5 + b .Lstrcpy_unalign5 -strcpy_unalign_copy5bytes: +.Lstrcpy_unalign_copy5bytes: stmia r0!, {r2} strb r4, [r0] m_ret inst=pop -strcpy_unalign_copy6bytes: +.Lstrcpy_unalign_copy6bytes: stmia r0!, {r2} strb r4, [r0], #1 strb r5, [r0] @@ -388,13 +395,13 @@ strcpy_unalign_copy6bytes: .p2align 2 // Can read 4 bytes before possibly crossing a page. -strcpy_unalign4: +.Lstrcpy_unalign4: ldmia r1!, {r2} sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .Lstrcpy_zero_in_first_register ldmia r1!, {r3} pld [r1, #64] @@ -402,20 +409,20 @@ strcpy_unalign4: sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .Lstrcpy_zero_in_second_register stmia r0!, {r2, r3} - b strcpy_unalign4 + b .Lstrcpy_unalign4 .p2align 2 // Can read 3 bytes before possibly crossing a page. -strcpy_unalign3: +.Lstrcpy_unalign3: ldrb r2, [r1] - cbz r2, strcpy_unalign3_copy1byte + cbz r2, .Lstrcpy_unalign3_copy1byte ldrb r3, [r1, #1] - cbz r3, strcpy_unalign3_copy2bytes + cbz r3, .Lstrcpy_unalign3_copy2bytes ldrb r4, [r1, #2] - cbz r4, strcpy_unalign3_copy3bytes + cbz r4, .Lstrcpy_unalign3_copy3bytes ldr r2, [r1], #4 ldr r3, [r1], #4 @@ -423,26 +430,26 @@ strcpy_unalign3: pld [r1, #64] lsrs lr, r2, #24 - beq strcpy_unalign_copy4bytes + beq .Lstrcpy_unalign_copy4bytes sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .Lstrcpy_zero_in_second_register stmia r0!, {r2, r3} - b strcpy_unalign3 + b .Lstrcpy_unalign3 -strcpy_unalign3_copy1byte: +.Lstrcpy_unalign3_copy1byte: strb r2, [r0] m_ret inst=pop -strcpy_unalign3_copy2bytes: +.Lstrcpy_unalign3_copy2bytes: strb r2, [r0], #1 strb r3, [r0] m_ret inst=pop -strcpy_unalign3_copy3bytes: +.Lstrcpy_unalign3_copy3bytes: strb r2, [r0], #1 strb r3, [r0], #1 strb r4, [r0] @@ -450,34 +457,34 @@ strcpy_unalign3_copy3bytes: .p2align 2 // Can read 2 bytes before possibly crossing a page. -strcpy_unalign2: +.Lstrcpy_unalign2: ldrb r2, [r1] - cbz r2, strcpy_unalign_copy1byte + cbz r2, .Lstrcpy_unalign_copy1byte ldrb r3, [r1, #1] - cbz r3, strcpy_unalign_copy2bytes + cbz r3, .Lstrcpy_unalign_copy2bytes ldr r2, [r1], #4 ldr r3, [r1], #4 pld [r1, #64] tst r2, #0xff0000 - beq strcpy_unalign_copy3bytes + beq .Lstrcpy_unalign_copy3bytes lsrs ip, r2, #24 - beq strcpy_unalign_copy4bytes + beq .Lstrcpy_unalign_copy4bytes sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .Lstrcpy_zero_in_second_register stmia r0!, {r2, r3} - b strcpy_unalign2 + b .Lstrcpy_unalign2 .p2align 2 // Can read 1 byte before possibly crossing a page. -strcpy_unalign1: +.Lstrcpy_unalign1: ldrb r2, [r1] - cbz r2, strcpy_unalign_copy1byte + cbz r2, .Lstrcpy_unalign_copy1byte ldr r2, [r1], #4 ldr r3, [r1], #4 @@ -487,62 +494,62 @@ strcpy_unalign1: sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .Lstrcpy_zero_in_first_register sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .Lstrcpy_zero_in_second_register stmia r0!, {r2, r3} - b strcpy_unalign1 + b .Lstrcpy_unalign1 -strcpy_unalign_copy1byte: +.Lstrcpy_unalign_copy1byte: strb r2, [r0] m_ret inst=pop -strcpy_unalign_copy2bytes: +.Lstrcpy_unalign_copy2bytes: strb r2, [r0], #1 strb r3, [r0] m_ret inst=pop -strcpy_unalign_copy3bytes: +.Lstrcpy_unalign_copy3bytes: strh r2, [r0], #2 lsr r2, #16 strb r2, [r0] m_ret inst=pop -strcpy_unalign_copy4bytes: +.Lstrcpy_unalign_copy4bytes: stmia r0, {r2} m_ret inst=pop -strcat_align_src: +.Lstrcat_align_src: // Align to a double word (64 bits). rsb r3, r3, #8 lsls ip, r3, #31 - beq strcat_align_to_32 + beq .Lstrcat_align_to_32 ldrb r2, [r0], #1 - cbz r2, strcat_r0_update + cbz r2, .Lstrcat_r0_update -strcat_align_to_32: - bcc strcat_align_to_64 +.Lstrcat_align_to_32: + bcc .Lstrcat_align_to_64 ldrb r2, [r0], #1 - cbz r2, strcat_r0_update + cbz r2, .Lstrcat_r0_update ldrb r2, [r0], #1 - cbz r2, strcat_r0_update + cbz r2, .Lstrcat_r0_update -strcat_align_to_64: +.Lstrcat_align_to_64: tst r3, #4 - beq strcat_mainloop + beq .Lstrcat_mainloop ldr r3, [r0], #4 sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcat_zero_in_second_register - b strcat_mainloop + bne .Lstrcat_zero_in_second_register + b .Lstrcat_mainloop -strcat_r0_update: +.Lstrcat_r0_update: sub r0, r0, #1 - b strcat_r0_scan_done + b .Lstrcat_r0_scan_done END(strcat) diff --git a/libc/arch-arm/cortex-a9/bionic/string_copy.S b/libc/arch-arm/cortex-a9/bionic/string_copy.S index caf5a11..642db0f 100644 --- a/libc/arch-arm/cortex-a9/bionic/string_copy.S +++ b/libc/arch-arm/cortex-a9/bionic/string_copy.S @@ -244,13 +244,20 @@ ENTRY(strcpy) .Lstringcopy_align_to_64: tst r3, #4 beq .Lstringcopy_check_src_align - ldr r2, [r1], #4 - - sub ip, r2, #0x01010101 - bic ip, ip, r2 - ands ip, ip, #0x80808080 - bne .Lstringcopy_zero_in_first_register - stmia r0!, {r2} + // Read one byte at a time since we don't have any idea about the alignment + // of the source and we don't want to read into a different page. + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .Lstringcopy_complete + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .Lstringcopy_complete + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .Lstringcopy_complete + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .Lstringcopy_complete b .Lstringcopy_check_src_align .Lstringcopy_complete: diff --git a/libc/arch-arm/cortex-a9/cortex-a9.mk b/libc/arch-arm/cortex-a9/cortex-a9.mk index 7b38de1..db4bcc7 100644 --- a/libc/arch-arm/cortex-a9/cortex-a9.mk +++ b/libc/arch-arm/cortex-a9/cortex-a9.mk @@ -10,6 +10,7 @@ libc_bionic_src_files_arm += \ arch-arm/cortex-a9/bionic/strlen.S \ libc_bionic_src_files_arm += \ + arch-arm/generic/bionic/memchr.S \ arch-arm/generic/bionic/memcmp.S \ libc_bionic_src_files_arm += \ diff --git a/libc/arch-arm/denver/denver.mk b/libc/arch-arm/denver/denver.mk index 5fddf95..e81f8c7 100644 --- a/libc/arch-arm/denver/denver.mk +++ b/libc/arch-arm/denver/denver.mk @@ -1,4 +1,5 @@ libc_bionic_src_files_arm += \ + arch-arm/generic/bionic/memchr.S \ arch-arm/generic/bionic/memcmp.S \ arch-arm/denver/bionic/memcpy.S \ arch-arm/denver/bionic/memmove.S \ diff --git a/libc/arch-arm/generic/bionic/memchr.S b/libc/arch-arm/generic/bionic/memchr.S new file mode 100644 index 0000000..cb00d82 --- /dev/null +++ b/libc/arch-arm/generic/bionic/memchr.S @@ -0,0 +1,155 @@ +/* Copyright (c) 2010-2015, Linaro Limited + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Linaro Limited nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + Written by Dave Gilbert <david.gilbert@linaro.org> + + This memchr routine is optimised on a Cortex-A9 and should work on + all ARMv7 processors. It has a fast past for short sizes, and has + an optimised path for large data sets; the worst case is finding the + match early in a large data set. + + */ + +#include <private/bionic_asm.h> + +@ 2011-02-07 david.gilbert@linaro.org +@ Extracted from local git a5b438d861 +@ 2011-07-14 david.gilbert@linaro.org +@ Import endianness fix from local git ea786f1b +@ 2011-12-07 david.gilbert@linaro.org +@ Removed unneeded cbz from align loop + + .syntax unified + .arch armv7-a + +@ this lets us check a flag in a 00/ff byte easily in either endianness +#ifdef __ARMEB__ +#define CHARTSTMASK(c) 1<<(31-(c*8)) +#else +#define CHARTSTMASK(c) 1<<(c*8) +#endif + .text + .thumb + +@ --------------------------------------------------------------------------- + .thumb_func +ENTRY(memchr) + .p2align 4,,15 + @ r0 = start of memory to scan + @ r1 = character to look for + @ r2 = length + @ returns r0 = pointer to character or NULL if not found + and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char + + cmp r2,#16 @ If it's short don't bother with anything clever + blt 20f + + tst r0, #7 @ If it's already aligned skip the next bit + beq 10f + + @ Work up to an aligned point +5: + ldrb r3, [r0],#1 + subs r2, r2, #1 + cmp r3, r1 + beq 50f @ If it matches exit found + tst r0, #7 + bne 5b @ If not aligned yet then do next byte + +10: + @ At this point, we are aligned, we know we have at least 8 bytes to work with + push {r4,r5,r6,r7} + orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes + orr r1, r1, r1, lsl #16 + bic r4, r2, #7 @ Number of double words to work with + mvns r7, #0 @ all F's + movs r3, #0 + +15: + ldrd r5,r6,[r0],#8 + subs r4, r4, #8 + eor r5,r5, r1 @ Get it so that r5,r6 have 00's where the bytes match the target + eor r6,r6, r1 + uadd8 r5, r5, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 + sel r5, r3, r7 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION + uadd8 r6, r6, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 + sel r6, r5, r7 @ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION + cbnz r6, 60f + bne 15b @ (Flags from the subs above) If not run out of bytes then go around again + + pop {r4,r5,r6,r7} + and r1,r1,#0xff @ Get r1 back to a single character from the expansion above + and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done + +20: + cbz r2, 40f @ 0 length or hit the end already then not found + +21: @ Post aligned section, or just a short call + ldrb r3,[r0],#1 + subs r2,r2,#1 + eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub + cbz r3, 50f + bne 21b @ on r2 flags + +40: + movs r0,#0 @ not found + bx lr + +50: + subs r0,r0,#1 @ found + bx lr + +60: @ We're here because the fast path found a hit - now we have to track down exactly which word it was + @ r0 points to the start of the double word after the one that was tested + @ r5 has the 00/ff pattern for the first word, r6 has the chained value + cmp r5, #0 + itte eq + moveq r5, r6 @ the end is in the 2nd word + subeq r0,r0,#3 @ Points to 2nd byte of 2nd word + subne r0,r0,#7 @ or 2nd byte of 1st word + + @ r0 currently points to the 3rd byte of the word containing the hit + tst r5, # CHARTSTMASK(0) @ 1st character + bne 61f + adds r0,r0,#1 + tst r5, # CHARTSTMASK(1) @ 2nd character + ittt eq + addeq r0,r0,#1 + tsteq r5, # (3<<15) @ 2nd & 3rd character + @ If not the 3rd must be the last one + addeq r0,r0,#1 + +61: + pop {r4,r5,r6,r7} + subs r0,r0,#1 + bx lr +END(memchr) diff --git a/libc/arch-arm/generic/bionic/memcmp.S b/libc/arch-arm/generic/bionic/memcmp.S index c78dbd4..6643d55 100644 --- a/libc/arch-arm/generic/bionic/memcmp.S +++ b/libc/arch-arm/generic/bionic/memcmp.S @@ -221,8 +221,7 @@ ENTRY(memcmp) bne 8b 9: /* restore registers and return */ - ldmfd sp!, {r4, lr} - bx lr + ldmfd sp!, {r4, pc} 10: /* process less than 12 bytes */ cmp r2, #0 diff --git a/libc/arch-arm/generic/bionic/memcpy.S b/libc/arch-arm/generic/bionic/memcpy.S index ea5a399..65cba4c 100644 --- a/libc/arch-arm/generic/bionic/memcpy.S +++ b/libc/arch-arm/generic/bionic/memcpy.S @@ -194,8 +194,7 @@ ENTRY(memcpy) /* we're done! restore everything and return */ 1: ldmfd sp!, {r5-r11} - ldmfd sp!, {r0, r4, lr} - bx lr + ldmfd sp!, {r0, r4, pc} /********************************************************************/ @@ -385,8 +384,7 @@ ENTRY(memcpy) /* we're done! restore sp and spilled registers and return */ add sp, sp, #28 - ldmfd sp!, {r0, r4, lr} - bx lr + ldmfd sp!, {r0, r4, pc} END(memcpy) // Only reached when the __memcpy_chk check fails. diff --git a/libc/arch-arm/generic/bionic/memset.S b/libc/arch-arm/generic/bionic/memset.S index d17a9c4..b8eabbf 100644 --- a/libc/arch-arm/generic/bionic/memset.S +++ b/libc/arch-arm/generic/bionic/memset.S @@ -82,8 +82,7 @@ ENTRY(memset) strbcs r1, [r0], #1 strbmi r1, [r0], #1 subs r2, r2, r3 - popls {r0, r4-r7, lr} /* return */ - bxls lr + popls {r0, r4-r7, pc} /* return */ /* align the destination to a cache-line */ mov r12, r1 @@ -126,8 +125,7 @@ ENTRY(memset) strhmi r1, [r0], #2 movs r2, r2, lsl #2 strbcs r1, [r0] - ldmfd sp!, {r0, r4-r7, lr} - bx lr + ldmfd sp!, {r0, r4-r7, pc} END(memset) .data diff --git a/libc/arch-arm/generic/generic.mk b/libc/arch-arm/generic/generic.mk index e49d6d2..016c882 100644 --- a/libc/arch-arm/generic/generic.mk +++ b/libc/arch-arm/generic/generic.mk @@ -1,4 +1,5 @@ libc_bionic_src_files_arm += \ + arch-arm/generic/bionic/memchr.S \ arch-arm/generic/bionic/memcmp.S \ arch-arm/generic/bionic/memcpy.S \ arch-arm/generic/bionic/memset.S \ diff --git a/libc/arch-arm/krait/bionic/__strcat_chk.S b/libc/arch-arm/krait/bionic/__strcat_chk.S index 246f159..1a39c5b 100644 --- a/libc/arch-arm/krait/bionic/__strcat_chk.S +++ b/libc/arch-arm/krait/bionic/__strcat_chk.S @@ -40,7 +40,7 @@ ENTRY(__strcat_chk) pld [r0, #0] push {r0, lr} - .cfi_def_cfa_offset 8 + .cfi_adjust_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 push {r4, r5} @@ -177,7 +177,7 @@ ENTRY(__strcat_chk) .L_strlen_done: add r2, r3, r4 cmp r2, lr - bhi __strcat_chk_failed + bhi .L_strcat_chk_failed // Set up the registers for the memcpy code. mov r1, r5 @@ -185,20 +185,17 @@ ENTRY(__strcat_chk) mov r2, r4 add r0, r0, r3 pop {r4, r5} -END(__strcat_chk) + .cfi_adjust_cfa_offset -8 + .cfi_restore r4 + .cfi_restore r5 -#define MEMCPY_BASE __strcat_chk_memcpy_base -#define MEMCPY_BASE_ALIGNED __strcat_chk_memcpy_base_aligned #include "memcpy_base.S" -ENTRY_PRIVATE(__strcat_chk_failed) - .cfi_def_cfa_offset 8 - .cfi_rel_offset r0, 0 - .cfi_rel_offset lr, 4 + // Undo the above cfi directives. .cfi_adjust_cfa_offset 8 .cfi_rel_offset r4, 0 .cfi_rel_offset r5, 4 - +.L_strcat_chk_failed: ldr r0, error_message ldr r1, error_code 1: @@ -208,7 +205,7 @@ error_code: .word BIONIC_EVENT_STRCAT_BUFFER_OVERFLOW error_message: .word error_string-(1b+4) -END(__strcat_chk_failed) +END(__strcat_chk) .data error_string: diff --git a/libc/arch-arm/krait/bionic/__strcpy_chk.S b/libc/arch-arm/krait/bionic/__strcpy_chk.S index db76686..00202f3 100644 --- a/libc/arch-arm/krait/bionic/__strcpy_chk.S +++ b/libc/arch-arm/krait/bionic/__strcpy_chk.S @@ -39,7 +39,7 @@ ENTRY(__strcpy_chk) pld [r0, #0] push {r0, lr} - .cfi_def_cfa_offset 8 + .cfi_adjust_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 @@ -149,21 +149,14 @@ ENTRY(__strcpy_chk) pld [r1, #64] ldr r0, [sp] cmp r3, lr - bhs __strcpy_chk_failed + bhs .L_strcpy_chk_failed // Add 1 for copy length to get the string terminator. add r2, r3, #1 -END(__strcpy_chk) -#define MEMCPY_BASE __strcpy_chk_memcpy_base -#define MEMCPY_BASE_ALIGNED __strcpy_chk_memcpy_base_aligned #include "memcpy_base.S" -ENTRY_PRIVATE(__strcpy_chk_failed) - .cfi_def_cfa_offset 8 - .cfi_rel_offset r0, 0 - .cfi_rel_offset lr, 4 - +.L_strcpy_chk_failed: ldr r0, error_message ldr r1, error_code 1: @@ -173,7 +166,7 @@ error_code: .word BIONIC_EVENT_STRCPY_BUFFER_OVERFLOW error_message: .word error_string-(1b+4) -END(__strcpy_chk_failed) +END(__strcpy_chk) .data error_string: diff --git a/libc/arch-arm/krait/bionic/memcpy.S b/libc/arch-arm/krait/bionic/memcpy.S index 9ff46a8..5d27b57 100644 --- a/libc/arch-arm/krait/bionic/memcpy.S +++ b/libc/arch-arm/krait/bionic/memcpy.S @@ -45,7 +45,7 @@ ENTRY(__memcpy_chk) cmp r2, r3 - bhi __memcpy_chk_fail + bhi .L_memcpy_chk_fail // Fall through to memcpy... END(__memcpy_chk) @@ -53,19 +53,20 @@ END(__memcpy_chk) ENTRY(memcpy) pld [r1, #64] stmfd sp!, {r0, lr} - .cfi_def_cfa_offset 8 + .cfi_adjust_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 -END(memcpy) -#define MEMCPY_BASE __memcpy_base -#define MEMCPY_BASE_ALIGNED __memcpy_base_aligned #include "memcpy_base.S" -ENTRY_PRIVATE(__memcpy_chk_fail) + // Undo the cfi directives from above. + .cfi_adjust_cfa_offset -8 + .cfi_restore r0 + .cfi_restore lr +.L_memcpy_chk_fail: // Preserve lr for backtrace. push {lr} - .cfi_def_cfa_offset 4 + .cfi_adjust_cfa_offset 4 .cfi_rel_offset lr, 0 ldr r0, error_message @@ -77,7 +78,7 @@ error_code: .word BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW error_message: .word error_string-(1b+4) -END(__memcpy_chk_fail) +END(memcpy) .data error_string: diff --git a/libc/arch-arm/krait/bionic/memcpy_base.S b/libc/arch-arm/krait/bionic/memcpy_base.S index 035dcf1..76c5a84 100644 --- a/libc/arch-arm/krait/bionic/memcpy_base.S +++ b/libc/arch-arm/krait/bionic/memcpy_base.S @@ -1,123 +1,191 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - - -/* - * This code assumes it is running on a processor that supports all arm v7 - * instructions, that supports neon instructions, and that has a 32 byte - * cache line. - */ - -// Assumes neon instructions and a cache line size of 32 bytes. - -ENTRY_PRIVATE(MEMCPY_BASE) - .cfi_def_cfa_offset 8 - .cfi_rel_offset r0, 0 - .cfi_rel_offset lr, 4 - - /* do we have at least 16-bytes to copy (needed for alignment below) */ - cmp r2, #16 - blo 5f - - /* align destination to cache-line for the write-buffer */ - rsb r3, r0, #0 - ands r3, r3, #0xF - beq 2f - - /* copy up to 15-bytes (count in r3) */ - sub r2, r2, r3 - movs ip, r3, lsl #31 - itt mi - ldrbmi lr, [r1], #1 - strbmi lr, [r0], #1 - itttt cs - ldrbcs ip, [r1], #1 - ldrbcs lr, [r1], #1 - strbcs ip, [r0], #1 - strbcs lr, [r0], #1 - movs ip, r3, lsl #29 - bge 1f - // copies 4 bytes, destination 32-bits aligned - vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]! - vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]! -1: bcc 2f - // copies 8 bytes, destination 64-bits aligned - vld1.8 {d0}, [r1]! - vst1.8 {d0}, [r0, :64]! - -2: /* make sure we have at least 64 bytes to copy */ - subs r2, r2, #64 - blo 2f - -1: /* The main loop copies 64 bytes at a time */ - vld1.8 {d0 - d3}, [r1]! - vld1.8 {d4 - d7}, [r1]! - pld [r1, #(32*8)] - subs r2, r2, #64 - vst1.8 {d0 - d3}, [r0, :128]! - vst1.8 {d4 - d7}, [r0, :128]! - bhs 1b - -2: /* fix-up the remaining count and make sure we have >= 32 bytes left */ - adds r2, r2, #32 - blo 4f - - /* Copy 32 bytes. These cache lines were already preloaded */ - vld1.8 {d0 - d3}, [r1]! - sub r2, r2, #32 - vst1.8 {d0 - d3}, [r0, :128]! - -4: /* less than 32 left */ - add r2, r2, #32 - tst r2, #0x10 - beq 5f - // copies 16 bytes, 128-bits aligned - vld1.8 {d0, d1}, [r1]! - vst1.8 {d0, d1}, [r0, :128]! - -5: /* copy up to 15-bytes (count in r2) */ - movs ip, r2, lsl #29 - bcc 1f - vld1.8 {d0}, [r1]! - vst1.8 {d0}, [r0]! -1: bge 2f - vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]! - vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]! -2: movs ip, r2, lsl #31 - itt mi - ldrbmi r3, [r1], #1 - strbmi r3, [r0], #1 - itttt cs - ldrbcs ip, [r1], #1 - ldrbcs lr, [r1], #1 - strbcs ip, [r0], #1 - strbcs lr, [r0], #1 - - ldmfd sp!, {r0, lr} - bx lr -END(MEMCPY_BASE) +/*************************************************************************** + Copyright (c) 2009-2013 The Linux Foundation. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of The Linux Foundation nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/* Assumes neon instructions and a cache line size of 64 bytes. */ + +#include <machine/cpu-features.h> +#include <machine/asm.h> + +#define PLDOFFS (10) +#define PLDTHRESH (PLDOFFS) +#define BBTHRESH (4096/64) +#define PLDSIZE (64) + +#if (PLDOFFS < 1) +#error Routine does not support offsets less than 1 +#endif + +#if (PLDTHRESH < PLDOFFS) +#error PLD threshold must be greater than or equal to the PLD offset +#endif + + .text + .fpu neon + +.L_memcpy_base: + cmp r2, #4 + blt .L_neon_lt4 + cmp r2, #16 + blt .L_neon_lt16 + cmp r2, #32 + blt .L_neon_16 + cmp r2, #64 + blt .L_neon_copy_32_a + + mov r12, r2, lsr #6 + cmp r12, #PLDTHRESH + ble .L_neon_copy_64_loop_nopld + + push {r9, r10} + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset r9, 0 + .cfi_rel_offset r10, 4 + + cmp r12, #BBTHRESH + ble .L_neon_prime_pump + + add lr, r0, #0x400 + add r9, r1, #(PLDOFFS*PLDSIZE) + sub lr, lr, r9 + lsl lr, lr, #21 + lsr lr, lr, #21 + add lr, lr, #(PLDOFFS*PLDSIZE) + cmp r12, lr, lsr #6 + ble .L_neon_prime_pump + + itt gt + movgt r9, #(PLDOFFS) + rsbsgt r9, r9, lr, lsr #6 + ble .L_neon_prime_pump + + add r10, r1, lr + bic r10, #0x3F + + sub r12, r12, lr, lsr #6 + + cmp r9, r12 + itee le + suble r12, r12, r9 + movgt r9, r12 + movgt r12, #0 + + pld [r1, #((PLDOFFS-1)*PLDSIZE)] +.L_neon_copy_64_loop_outer_doublepld: + pld [r1, #((PLDOFFS)*PLDSIZE)] + vld1.32 {q0, q1}, [r1]! + vld1.32 {q2, q3}, [r1]! + ldr r3, [r10] + subs r9, r9, #1 + vst1.32 {q0, q1}, [r0]! + vst1.32 {q2, q3}, [r0]! + add r10, #64 + bne .L_neon_copy_64_loop_outer_doublepld + cmp r12, #0 + beq .L_neon_pop_before_nopld + + cmp r12, #(512*1024/64) + blt .L_neon_copy_64_loop_outer + +.L_neon_copy_64_loop_ddr: + vld1.32 {q0, q1}, [r1]! + vld1.32 {q2, q3}, [r1]! + pld [r10] + subs r12, r12, #1 + vst1.32 {q0, q1}, [r0]! + vst1.32 {q2, q3}, [r0]! + add r10, #64 + bne .L_neon_copy_64_loop_ddr + b .L_neon_pop_before_nopld + +.L_neon_prime_pump: + mov lr, #(PLDOFFS*PLDSIZE) + add r10, r1, #(PLDOFFS*PLDSIZE) + bic r10, #0x3F + sub r12, r12, #PLDOFFS + ldr r3, [r10, #(-1*PLDSIZE)] + +.L_neon_copy_64_loop_outer: + vld1.32 {q0, q1}, [r1]! + vld1.32 {q2, q3}, [r1]! + ldr r3, [r10] + subs r12, r12, #1 + vst1.32 {q0, q1}, [r0]! + vst1.32 {q2, q3}, [r0]! + add r10, #64 + bne .L_neon_copy_64_loop_outer + +.L_neon_pop_before_nopld: + mov r12, lr, lsr #6 + pop {r9, r10} + .cfi_adjust_cfa_offset -8 + .cfi_restore r9 + .cfi_restore r10 + +.L_neon_copy_64_loop_nopld: + vld1.32 {q8, q9}, [r1]! + vld1.32 {q10, q11}, [r1]! + subs r12, r12, #1 + vst1.32 {q8, q9}, [r0]! + vst1.32 {q10, q11}, [r0]! + bne .L_neon_copy_64_loop_nopld + ands r2, r2, #0x3f + beq .L_neon_exit + +.L_neon_copy_32_a: + movs r3, r2, lsl #27 + bcc .L_neon_16 + vld1.32 {q0,q1}, [r1]! + vst1.32 {q0,q1}, [r0]! + +.L_neon_16: + bpl .L_neon_lt16 + vld1.32 {q8}, [r1]! + vst1.32 {q8}, [r0]! + ands r2, r2, #0x0f + beq .L_neon_exit + +.L_neon_lt16: + movs r3, r2, lsl #29 + bcc 1f + vld1.8 {d0}, [r1]! + vst1.8 {d0}, [r0]! +1: + bge .L_neon_lt4 + vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]! + vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]! + +.L_neon_lt4: + movs r2, r2, lsl #31 + itt cs + ldrhcs r3, [r1], #2 + strhcs r3, [r0], #2 + itt mi + ldrbmi r3, [r1] + strbmi r3, [r0] + +.L_neon_exit: + pop {r0, pc} diff --git a/libc/arch-arm/krait/bionic/memmove.S b/libc/arch-arm/krait/bionic/memmove.S new file mode 100644 index 0000000..aea7315 --- /dev/null +++ b/libc/arch-arm/krait/bionic/memmove.S @@ -0,0 +1,219 @@ +/*************************************************************************** + Copyright (c) 2009-2014 The Linux Foundation. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of The Linux Foundation nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/*************************************************************************** + * Neon memmove: Attempts to do a memmove with Neon registers if possible, + * Inputs: + * dest: The destination buffer + * src: The source buffer + * n: The size of the buffer to transfer + * Outputs: + * + ***************************************************************************/ + +#include <private/bionic_asm.h> +#include <private/libc_events.h> +/* + * These can be overridden in: + * device/<vendor>/<board>/BoardConfig.mk + * by setting the following: + * TARGET_USE_KRAIT_BIONIC_OPTIMIZATION := true + * TARGET_USE_KRAIT_PLD_SET := true + * TARGET_KRAIT_BIONIC_PLDOFFS := <pldoffset> + * TARGET_KRAIT_BIONIC_PLDSIZE := <pldsize> + * TARGET_KRAIT_BIONIC_PLDTHRESH := <pldthreshold> + */ +#ifndef PLDOFFS +#define PLDOFFS (10) +#endif +#ifndef PLDTHRESH +#define PLDTHRESH (PLDOFFS) +#endif +#if (PLDOFFS < 5) +#error Routine does not support offsets less than 5 +#endif +#if (PLDTHRESH < PLDOFFS) +#error PLD threshold must be greater than or equal to the PLD offset +#endif +#ifndef PLDSIZE +#define PLDSIZE (64) +#endif + + .text + .syntax unified + .fpu neon + .thumb + .thumb_func + +//ENTRY(bcopy) +// //.cfi_startproc +// mov r12, r0 +// mov r0, r1 +// mov r1, r12 +// // Fall through to memmove +// //.cfi_endproc +//END(bcopy) + +ENTRY(memmove) +_memmove_words: + //.cfi_startproc + .save {r0, lr} + cmp r2, #0 + it ne + subsne r12, r0, r1 // Warning: do not combine these "it" blocks + it eq + bxeq lr +// memmove only if r1 < r0 < r1+r2 + cmp r0, r1 + itt ge + addge r12, r1, r2 + cmpge r12, r0 + it le + ble memcpy + cmp r2, #4 + it le + ble .Lneon_b2f_smallcopy_loop + push {r0, lr} + add r0, r0, r2 + add r1, r1, r2 + cmp r2, #64 + it ge + bge .Lneon_b2f_copy_64 + cmp r2, #32 + it ge + bge .Lneon_b2f_copy_32 + cmp r2, #8 + it ge + bge .Lneon_b2f_copy_8 + b .Lneon_b2f_copy_1 +.Lneon_b2f_copy_64: + mov r12, r2, lsr #6 + add r0, r0, #32 + add r1, r1, #32 + cmp r12, #PLDTHRESH + it le + ble .Lneon_b2f_copy_64_loop_nopld + sub r12, #PLDOFFS + sub lr, r1, #(PLDOFFS)*PLDSIZE +.Lneon_b2f_copy_64_loop_outer: + pld [lr] + sub r1, r1, #96 + sub r0, r0, #96 + vld1.32 {q0, q1}, [r1]! + vld1.32 {q2, q3}, [r1] + sub lr, lr, #64 + subs r12, r12, #1 + vst1.32 {q0, q1}, [r0]! + vst1.32 {q2, q3}, [r0] + it ne + bne .Lneon_b2f_copy_64_loop_outer + mov r12, #PLDOFFS +.Lneon_b2f_copy_64_loop_nopld: + sub r1, r1, #96 + sub r0, r0, #96 + vld1.32 {q8, q9}, [r1]! + vld1.32 {q10, q11}, [r1] + subs r12, r12, #1 + vst1.32 {q8, q9}, [r0]! + vst1.32 {q10, q11}, [r0] + it ne + bne .Lneon_b2f_copy_64_loop_nopld + ands r2, r2, #0x3f + it eq + beq .Lneon_memmove_done + sub r1, r1, #32 + sub r0, r0, #32 + cmp r2, #32 + it lt + blt .Lneon_b2f_copy_8 +.Lneon_b2f_copy_32: + sub r1, r1, #32 + sub r0, r0, #32 + vld1.32 {q0, q1}, [r1] + vst1.32 {q0, q1}, [r0] + ands r2, r2, #0x1f + it eq + beq .Lneon_memmove_done +.Lneon_b2f_copy_8: + movs r12, r2, lsr #0x3 + it eq + beq .Lneon_b2f_copy_1 +.Lneon_b2f_copy_8_loop: + sub r1, r1, #8 + sub r0, r0, #8 + vld1.32 {d0}, [r1] + subs r12, r12, #1 + vst1.32 {d0}, [r0] + it ne + bne .Lneon_b2f_copy_8_loop + ands r2, r2, #0x7 + beq .Lneon_memmove_done +.Lneon_b2f_copy_1: + movs r12, r2, lsl #29 + itttt mi + submi r1, r1, #4 + submi r0, r0, #4 + ldrmi r3, [r1] + strmi r3, [r0] + movs r2, r2, lsl #31 + itttt cs + subcs r1, r1, #2 + subcs r0, r0, #2 + ldrhcs r3, [r1] + strhcs r3, [r0] + itttt mi + submi r1, r1, #1 + submi r0, r0, #1 + ldrbmi r12, [r1] + strbmi r12, [r0] +.Lneon_memmove_done: + pop {r0, pc} +.Lneon_b2f_smallcopy_loop: + // 4 bytes or less + add r1, r1, r2 + add r0, r0, r2 + movs r12, r2, lsl #29 + itttt mi + submi r1, r1, #4 + submi r0, r0, #4 + ldrmi r3, [r1] + strmi r3, [r0] + movs r2, r2, lsl #31 + itttt cs + subcs r1, r1, #2 + subcs r0, r0, #2 + ldrhcs r3, [r1] + strhcs r3, [r0] + itttt mi + submi r1, r1, #1 + submi r0, r0, #1 + ldrbmi r12, [r1] + strbmi r12, [r0] + bx lr +// .cfi_endproc +END(memmove) + diff --git a/libc/arch-arm/krait/bionic/memset.S b/libc/arch-arm/krait/bionic/memset.S index a4fbe17..ae05965 100644 --- a/libc/arch-arm/krait/bionic/memset.S +++ b/libc/arch-arm/krait/bionic/memset.S @@ -69,10 +69,7 @@ END(bzero) /* memset() returns its first argument. */ ENTRY(memset) - stmfd sp!, {r0} - .cfi_def_cfa_offset 4 - .cfi_rel_offset r0, 0 - + mov r3, r0 vdup.8 q0, r1 /* make sure we have at least 32 bytes to write */ @@ -82,7 +79,7 @@ ENTRY(memset) 1: /* The main loop writes 32 bytes at a time */ subs r2, r2, #32 - vst1.8 {d0 - d3}, [r0]! + vst1.8 {d0 - d3}, [r3]! bhs 1b 2: /* less than 32 left */ @@ -91,18 +88,17 @@ ENTRY(memset) beq 3f // writes 16 bytes, 128-bits aligned - vst1.8 {d0, d1}, [r0]! + vst1.8 {d0, d1}, [r3]! 3: /* write up to 15-bytes (count in r2) */ movs ip, r2, lsl #29 bcc 1f - vst1.8 {d0}, [r0]! + vst1.8 {d0}, [r3]! 1: bge 2f - vst1.32 {d0[0]}, [r0]! + vst1.32 {d0[0]}, [r3]! 2: movs ip, r2, lsl #31 - strbmi r1, [r0], #1 - strbcs r1, [r0], #1 - strbcs r1, [r0], #1 - ldmfd sp!, {r0} + strbmi r1, [r3], #1 + strbcs r1, [r3], #1 + strbcs r1, [r3], #1 bx lr END(memset) diff --git a/libc/arch-arm/krait/krait.mk b/libc/arch-arm/krait/krait.mk index 88b4d66..5f5b414 100644 --- a/libc/arch-arm/krait/krait.mk +++ b/libc/arch-arm/krait/krait.mk @@ -1,9 +1,19 @@ libc_bionic_src_files_arm += \ - arch-arm/krait/bionic/memcpy.S \ arch-arm/krait/bionic/memset.S \ arch-arm/krait/bionic/strcmp.S \ arch-arm/krait/bionic/__strcat_chk.S \ arch-arm/krait/bionic/__strcpy_chk.S \ + arch-arm/krait/bionic/memmove.S + +#For some targets we don't need this optimization. +#Corresponding flag is defined in device specific folder. +ifeq ($(TARGET_CPU_MEMCPY_BASE_OPT_DISABLE),true) +libc_bionic_src_files_arm += \ + arch-arm/cortex-a15/bionic/memcpy.S +else +libc_bionic_src_files_arm += \ + arch-arm/krait/bionic/memcpy.S +endif # Use cortex-a15 versions of strcat/strcpy/strlen and standard memmove libc_bionic_src_files_arm += \ @@ -13,7 +23,7 @@ libc_bionic_src_files_arm += \ arch-arm/cortex-a15/bionic/strlen.S \ libc_bionic_src_files_arm += \ + arch-arm/generic/bionic/memchr.S \ arch-arm/generic/bionic/memcmp.S \ -libc_bionic_src_files_arm += \ - arch-arm/denver/bionic/memmove.S \ + diff --git a/libc/arch-arm/scorpion/scorpion.mk b/libc/arch-arm/scorpion/scorpion.mk new file mode 100644 index 0000000..ce18a7e --- /dev/null +++ b/libc/arch-arm/scorpion/scorpion.mk @@ -0,0 +1,18 @@ +# Use krait versions of memset/strcmp/memmove +libc_bionic_src_files_arm += \ + arch-arm/krait/bionic/memset.S \ + arch-arm/krait/bionic/strcmp.S \ + arch-arm/krait/bionic/memmove.S + +libc_bionic_src_files_arm += \ + arch-arm/cortex-a15/bionic/memcpy.S \ + arch-arm/cortex-a15/bionic/stpcpy.S \ + arch-arm/cortex-a15/bionic/strcat.S \ + arch-arm/cortex-a15/bionic/__strcat_chk.S \ + arch-arm/cortex-a15/bionic/strcpy.S \ + arch-arm/cortex-a15/bionic/__strcpy_chk.S \ + arch-arm/cortex-a15/bionic/strlen.S + +libc_bionic_src_files_arm += \ + arch-arm/generic/bionic/memchr.S \ + arch-arm/generic/bionic/memcmp.S |