diff options
Diffstat (limited to 'libc/arch-arm')
35 files changed, 1250 insertions, 968 deletions
diff --git a/libc/arch-arm/arm.mk b/libc/arch-arm/arm.mk index 70cc8eb..cca4ed0 100644 --- a/libc/arch-arm/arm.mk +++ b/libc/arch-arm/arm.mk @@ -1,22 +1,35 @@ -# arm specific configs +# 32-bit arm. -# These are used by the 32-bit targets, but not the 64-bit ones. -libc_common_src_files_arm := \ +# +# Various kinds of LP32 cruft. +# + +libc_bionic_src_files_arm += \ + bionic/mmap.cpp \ + +libc_common_src_files_arm += \ bionic/legacy_32_bit_support.cpp \ bionic/ndk_cruft.cpp \ bionic/time64.c \ + +libc_netbsd_src_files_arm += \ + upstream-netbsd/common/lib/libc/hash/sha1/sha1.c \ + +libc_openbsd_src_files_arm += \ upstream-openbsd/lib/libc/stdio/putw.c \ -# These are shared by all the 32-bit targets, but not the 64-bit ones. -libc_bionic_src_files_arm := \ - bionic/mmap.cpp +# +# Default implementations of functions that are commonly optimized. +# -libc_common_src_files_arm += \ +libc_bionic_src_files_arm += \ bionic/memchr.c \ bionic/memrchr.c \ bionic/strchr.cpp \ bionic/strnlen.c \ bionic/strrchr.cpp \ + +libc_freebsd_src_files_arm += \ upstream-freebsd/lib/libc/string/wcscat.c \ upstream-freebsd/lib/libc/string/wcschr.c \ upstream-freebsd/lib/libc/string/wcscmp.c \ @@ -25,8 +38,9 @@ libc_common_src_files_arm += \ upstream-freebsd/lib/libc/string/wcsrchr.c \ upstream-freebsd/lib/libc/string/wmemcmp.c \ upstream-freebsd/lib/libc/string/wmemmove.c \ + +libc_openbsd_src_files_arm += \ upstream-openbsd/lib/libc/string/bcopy.c \ - upstream-openbsd/lib/libc/string/stpcpy.c \ upstream-openbsd/lib/libc/string/stpncpy.c \ upstream-openbsd/lib/libc/string/strlcat.c \ upstream-openbsd/lib/libc/string/strlcpy.c \ @@ -34,20 +48,10 @@ libc_common_src_files_arm += \ upstream-openbsd/lib/libc/string/strncmp.c \ upstream-openbsd/lib/libc/string/strncpy.c \ -# The C++ fortify function implementations for which there is an -# arm assembler version. # -# Fortify implementations of libc functions. -# libc_common_src_files_arm += -# bionic/__memcpy_chk.cpp \ -# bionic/__memset_chk.cpp \ -# bionic/__strcpy_chk.cpp \ -# bionic/__strcat_chk.cpp \ - -libc_common_cflags_arm := -DSOFTFLOAT +# Inherently architecture-specific code. +# -########################################## -### CPU specific source files libc_bionic_src_files_arm += \ arch-arm/bionic/abort_arm.S \ arch-arm/bionic/atomics_arm.c \ @@ -55,6 +59,7 @@ libc_bionic_src_files_arm += \ arch-arm/bionic/_exit_with_stack_teardown.S \ arch-arm/bionic/libgcc_compat.c \ arch-arm/bionic/memcmp.S \ + arch-arm/bionic/__restore.S \ arch-arm/bionic/_setjmp.S \ arch-arm/bionic/setjmp.S \ arch-arm/bionic/sigsetjmp.S \ @@ -63,9 +68,6 @@ libc_bionic_src_files_arm += \ libc_arch_static_src_files_arm := arch-arm/bionic/exidx_static.c libc_arch_dynamic_src_files_arm := arch-arm/bionic/exidx_dynamic.c -libc_netbsd_src_files_arm := \ - upstream-netbsd/common/lib/libc/hash/sha1/sha1.c \ - ## CPU variant specific source files ifeq ($(strip $(TARGET_$(my_2nd_arch_prefix)CPU_VARIANT)),) $(warning TARGET_$(my_2nd_arch_prefix)ARCH is arm, but TARGET_$(my_2nd_arch_prefix)CPU_VARIANT is not defined) diff --git a/libc/arch-arm/bionic/__restore.S b/libc/arch-arm/bionic/__restore.S new file mode 100644 index 0000000..9898125 --- /dev/null +++ b/libc/arch-arm/bionic/__restore.S @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <private/bionic_asm.h> + +// gdb is smart enough to unwind through signal frames with just the regular +// CFI information but libgcc and libunwind both need extra help. We do this +// by using .fnstart/.fnend and inserting a nop before both __restore and +// __restore_rt (but covered by the .fnstart/.fnend) so that although they're +// not inside the functions from objdump's point of view, an unwinder that +// blindly looks at the previous instruction (but is then smart enough to check +// the DWARF information to find out where it landed) gets the right answer. + +// We need to place .fnstart ourselves (but we may as well keep the free .fnend). +#undef __bionic_asm_custom_entry +#define __bionic_asm_custom_entry(f) + + .fnstart + .save {r0-r15} + .pad #32 + nop +ENTRY_PRIVATE(__restore) + // This function must have exactly this instruction sequence. + mov r7, #__NR_sigreturn + swi #0 +END(__restore) + + .fnstart + .save {r0-r15} + .pad #160 + nop +ENTRY_PRIVATE(__restore_rt) + // This function must have exactly this instruction sequence. + mov r7, #__NR_rt_sigreturn + swi #0 +END(__restore_rt) diff --git a/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S b/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S index 36da2d9..a2e9c22 100644 --- a/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S +++ b/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S @@ -40,12 +40,10 @@ ENTRY(__strcat_chk) pld [r0, #0] push {r0, lr} - .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 push {r4, r5} - .save {r4, r5} .cfi_adjust_cfa_offset 8 .cfi_rel_offset r4, 0 .cfi_rel_offset r5, 4 @@ -195,9 +193,6 @@ END(__strcat_chk) #include "memcpy_base.S" ENTRY_PRIVATE(__strcat_chk_failed) - .save {r0, lr} - .save {r4, r5} - .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 diff --git a/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S b/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S index c3e3e14..db76686 100644 --- a/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S +++ b/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S @@ -39,7 +39,6 @@ ENTRY(__strcpy_chk) pld [r0, #0] push {r0, lr} - .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 @@ -161,7 +160,6 @@ END(__strcpy_chk) #include "memcpy_base.S" ENTRY_PRIVATE(__strcpy_chk_failed) - .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 diff --git a/libc/arch-arm/cortex-a15/bionic/memcpy.S b/libc/arch-arm/cortex-a15/bionic/memcpy.S index da4f3dd..410b663 100644 --- a/libc/arch-arm/cortex-a15/bionic/memcpy.S +++ b/libc/arch-arm/cortex-a15/bionic/memcpy.S @@ -72,7 +72,6 @@ END(__memcpy_chk) ENTRY(memcpy) pld [r1, #64] push {r0, lr} - .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 @@ -85,7 +84,6 @@ END(memcpy) ENTRY_PRIVATE(__memcpy_chk_fail) // Preserve lr for backtrace. push {lr} - .save {lr} .cfi_def_cfa_offset 4 .cfi_rel_offset lr, 0 diff --git a/libc/arch-arm/cortex-a15/bionic/memcpy_base.S b/libc/arch-arm/cortex-a15/bionic/memcpy_base.S index 6ba4931..2a73852 100644 --- a/libc/arch-arm/cortex-a15/bionic/memcpy_base.S +++ b/libc/arch-arm/cortex-a15/bionic/memcpy_base.S @@ -54,7 +54,6 @@ */ ENTRY_PRIVATE(MEMCPY_BASE) - .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 @@ -172,7 +171,6 @@ ENTRY_PRIVATE(MEMCPY_BASE) END(MEMCPY_BASE) ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED) - .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 @@ -181,17 +179,14 @@ ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED) // i.e., not keeping the stack looking like users expect // (highest numbered register at highest address). strd r4, r5, [sp, #-8]! - .save {r4, r5} .cfi_adjust_cfa_offset 8 .cfi_rel_offset r4, 0 .cfi_rel_offset r5, 4 strd r6, r7, [sp, #-8]! - .save {r6, r7} .cfi_adjust_cfa_offset 8 .cfi_rel_offset r6, 0 .cfi_rel_offset r7, 0 strd r8, r9, [sp, #-8]! - .save {r8, r9} .cfi_adjust_cfa_offset 8 .cfi_rel_offset r8, 0 .cfi_rel_offset r9, 4 diff --git a/libc/arch-arm/cortex-a15/bionic/memset.S b/libc/arch-arm/cortex-a15/bionic/memset.S index 12c68d6..e4a1ec8 100644 --- a/libc/arch-arm/cortex-a15/bionic/memset.S +++ b/libc/arch-arm/cortex-a15/bionic/memset.S @@ -44,7 +44,6 @@ ENTRY(__memset_chk) bls .L_done // Preserve lr for backtrace. - .save {lr} push {lr} .cfi_def_cfa_offset 4 .cfi_rel_offset lr, 0 @@ -68,7 +67,6 @@ ENTRY(bzero) END(bzero) ENTRY(memset) - .save {r0} stmfd sp!, {r0} .cfi_def_cfa_offset 4 .cfi_rel_offset r0, 0 diff --git a/libc/arch-arm/cortex-a15/bionic/stpcpy.S b/libc/arch-arm/cortex-a15/bionic/stpcpy.S new file mode 100644 index 0000000..740523b --- /dev/null +++ b/libc/arch-arm/cortex-a15/bionic/stpcpy.S @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#define STPCPY +#include "string_copy.S" diff --git a/libc/arch-arm/cortex-a15/bionic/strcmp.S b/libc/arch-arm/cortex-a15/bionic/strcmp.S index 12da115..acedf0e 100644 --- a/libc/arch-arm/cortex-a15/bionic/strcmp.S +++ b/libc/arch-arm/cortex-a15/bionic/strcmp.S @@ -168,7 +168,6 @@ ENTRY(strcmp) bne .L_do_align /* Fast path. */ - .save {r4-r7} init .L_doubleword_aligned: diff --git a/libc/arch-arm/cortex-a15/bionic/strcpy.S b/libc/arch-arm/cortex-a15/bionic/strcpy.S index cb878c4..951face 100644 --- a/libc/arch-arm/cortex-a15/bionic/strcpy.S +++ b/libc/arch-arm/cortex-a15/bionic/strcpy.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013 The Android Open Source Project + * Copyright (C) 2014 The Android Open Source Project * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -25,427 +25,6 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ -/* - * Copyright (c) 2013 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <private/bionic_asm.h> - - .syntax unified - - .thumb - .thumb_func - - .macro m_push - push {r0, r4, r5, lr} - .endm // m_push - - .macro m_pop - pop {r0, r4, r5, pc} - .endm // m_pop - - .macro m_copy_byte reg, cmd, label - ldrb \reg, [r1], #1 - strb \reg, [r0], #1 - \cmd \reg, \label - .endm // m_copy_byte - -ENTRY(strcpy) - // For short copies, hard-code checking the first 8 bytes since this - // new code doesn't win until after about 8 bytes. - m_push - m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue - -strcpy_finish: - m_pop - -strcpy_continue: - pld [r1, #0] - ands r3, r0, #7 - beq strcpy_check_src_align - - // Align to a double word (64 bits). - rsb r3, r3, #8 - lsls ip, r3, #31 - beq strcpy_align_to_32 - - ldrb r2, [r1], #1 - strb r2, [r0], #1 - cbz r2, strcpy_complete - -strcpy_align_to_32: - bcc strcpy_align_to_64 - - ldrb r2, [r1], #1 - strb r2, [r0], #1 - cbz r2, strcpy_complete - ldrb r2, [r1], #1 - strb r2, [r0], #1 - cbz r2, strcpy_complete - -strcpy_align_to_64: - tst r3, #4 - beq strcpy_check_src_align - ldr r2, [r1], #4 - - sub ip, r2, #0x01010101 - bic ip, ip, r2 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register - str r2, [r0], #4 - -strcpy_check_src_align: - // At this point dst is aligned to a double word, check if src - // is also aligned to a double word. - ands r3, r1, #7 - bne strcpy_unaligned_copy - - .p2align 2 -strcpy_mainloop: - ldrd r2, r3, [r1], #8 - - pld [r1, #64] - - sub ip, r2, #0x01010101 - bic ip, ip, r2 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register - - sub ip, r3, #0x01010101 - bic ip, ip, r3 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register - - strd r2, r3, [r0], #8 - b strcpy_mainloop - -strcpy_complete: - m_pop - -strcpy_zero_in_first_register: - lsls lr, ip, #17 - bne strcpy_copy1byte - bcs strcpy_copy2bytes - lsls ip, ip, #1 - bne strcpy_copy3bytes - -strcpy_copy4bytes: - // Copy 4 bytes to the destiniation. - str r2, [r0] - m_pop - -strcpy_copy1byte: - strb r2, [r0] - m_pop - -strcpy_copy2bytes: - strh r2, [r0] - m_pop - -strcpy_copy3bytes: - strh r2, [r0], #2 - lsr r2, #16 - strb r2, [r0] - m_pop - -strcpy_zero_in_second_register: - lsls lr, ip, #17 - bne strcpy_copy5bytes - bcs strcpy_copy6bytes - lsls ip, ip, #1 - bne strcpy_copy7bytes - - // Copy 8 bytes to the destination. - strd r2, r3, [r0] - m_pop - -strcpy_copy5bytes: - str r2, [r0], #4 - strb r3, [r0] - m_pop - -strcpy_copy6bytes: - str r2, [r0], #4 - strh r3, [r0] - m_pop - -strcpy_copy7bytes: - str r2, [r0], #4 - strh r3, [r0], #2 - lsr r3, #16 - strb r3, [r0] - m_pop - -strcpy_unaligned_copy: - // Dst is aligned to a double word, while src is at an unknown alignment. - // There are 7 different versions of the unaligned copy code - // to prevent overreading the src. The mainloop of every single version - // will store 64 bits per loop. The difference is how much of src can - // be read without potentially crossing a page boundary. - tbb [pc, r3] -strcpy_unaligned_branchtable: - .byte 0 - .byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2) - - .p2align 2 - // Can read 7 bytes before possibly crossing a page. -strcpy_unalign7: - ldr r2, [r1], #4 - - sub ip, r2, #0x01010101 - bic ip, ip, r2 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register - - ldrb r3, [r1] - cbz r3, strcpy_unalign7_copy5bytes - ldrb r4, [r1, #1] - cbz r4, strcpy_unalign7_copy6bytes - ldrb r5, [r1, #2] - cbz r5, strcpy_unalign7_copy7bytes - - ldr r3, [r1], #4 - pld [r1, #64] - - lsrs ip, r3, #24 - strd r2, r3, [r0], #8 - beq strcpy_unalign_return - b strcpy_unalign7 - -strcpy_unalign7_copy5bytes: - str r2, [r0], #4 - strb r3, [r0] -strcpy_unalign_return: - m_pop - -strcpy_unalign7_copy6bytes: - str r2, [r0], #4 - strb r3, [r0], #1 - strb r4, [r0], #1 - m_pop - -strcpy_unalign7_copy7bytes: - str r2, [r0], #4 - strb r3, [r0], #1 - strb r4, [r0], #1 - strb r5, [r0], #1 - m_pop - - .p2align 2 - // Can read 6 bytes before possibly crossing a page. -strcpy_unalign6: - ldr r2, [r1], #4 - - sub ip, r2, #0x01010101 - bic ip, ip, r2 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register - - ldrb r4, [r1] - cbz r4, strcpy_unalign_copy5bytes - ldrb r5, [r1, #1] - cbz r5, strcpy_unalign_copy6bytes - - ldr r3, [r1], #4 - pld [r1, #64] - - tst r3, #0xff0000 - beq strcpy_copy7bytes - lsrs ip, r3, #24 - strd r2, r3, [r0], #8 - beq strcpy_unalign_return - b strcpy_unalign6 - - .p2align 2 - // Can read 5 bytes before possibly crossing a page. -strcpy_unalign5: - ldr r2, [r1], #4 - - sub ip, r2, #0x01010101 - bic ip, ip, r2 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register - - ldrb r4, [r1] - cbz r4, strcpy_unalign_copy5bytes - - ldr r3, [r1], #4 - - pld [r1, #64] - - sub ip, r3, #0x01010101 - bic ip, ip, r3 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register - - strd r2, r3, [r0], #8 - b strcpy_unalign5 - -strcpy_unalign_copy5bytes: - str r2, [r0], #4 - strb r4, [r0] - m_pop - -strcpy_unalign_copy6bytes: - str r2, [r0], #4 - strb r4, [r0], #1 - strb r5, [r0] - m_pop - - .p2align 2 - // Can read 4 bytes before possibly crossing a page. -strcpy_unalign4: - ldr r2, [r1], #4 - - sub ip, r2, #0x01010101 - bic ip, ip, r2 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register - - ldr r3, [r1], #4 - pld [r1, #64] - - sub ip, r3, #0x01010101 - bic ip, ip, r3 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register - - strd r2, r3, [r0], #8 - b strcpy_unalign4 - - .p2align 2 - // Can read 3 bytes before possibly crossing a page. -strcpy_unalign3: - ldrb r2, [r1] - cbz r2, strcpy_unalign3_copy1byte - ldrb r3, [r1, #1] - cbz r3, strcpy_unalign3_copy2bytes - ldrb r4, [r1, #2] - cbz r4, strcpy_unalign3_copy3bytes - - ldr r2, [r1], #4 - ldr r3, [r1], #4 - - pld [r1, #64] - - lsrs lr, r2, #24 - beq strcpy_copy4bytes - - sub ip, r3, #0x01010101 - bic ip, ip, r3 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register - - strd r2, r3, [r0], #8 - b strcpy_unalign3 - -strcpy_unalign3_copy1byte: - strb r2, [r0] - m_pop - -strcpy_unalign3_copy2bytes: - strb r2, [r0], #1 - strb r3, [r0] - m_pop - -strcpy_unalign3_copy3bytes: - strb r2, [r0], #1 - strb r3, [r0], #1 - strb r4, [r0] - m_pop - - .p2align 2 - // Can read 2 bytes before possibly crossing a page. -strcpy_unalign2: - ldrb r2, [r1] - cbz r2, strcpy_unalign_copy1byte - ldrb r4, [r1, #1] - cbz r4, strcpy_unalign_copy2bytes - - ldr r2, [r1], #4 - ldr r3, [r1], #4 - pld [r1, #64] - - tst r2, #0xff0000 - beq strcpy_copy3bytes - lsrs ip, r2, #24 - beq strcpy_copy4bytes - - sub ip, r3, #0x01010101 - bic ip, ip, r3 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register - - strd r2, r3, [r0], #8 - b strcpy_unalign2 - - .p2align 2 - // Can read 1 byte before possibly crossing a page. -strcpy_unalign1: - ldrb r2, [r1] - cbz r2, strcpy_unalign_copy1byte - - ldr r2, [r1], #4 - ldr r3, [r1], #4 - - pld [r1, #64] - - sub ip, r2, #0x01010101 - bic ip, ip, r2 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register - - sub ip, r3, #0x01010101 - bic ip, ip, r3 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register - - strd r2, r3, [r0], #8 - b strcpy_unalign1 - -strcpy_unalign_copy1byte: - strb r2, [r0] - m_pop -strcpy_unalign_copy2bytes: - strb r2, [r0], #1 - strb r4, [r0] - m_pop -END(strcpy) +#define STRCPY +#include "string_copy.S" diff --git a/libc/arch-arm/cortex-a15/bionic/string_copy.S b/libc/arch-arm/cortex-a15/bionic/string_copy.S new file mode 100644 index 0000000..20f0e91 --- /dev/null +++ b/libc/arch-arm/cortex-a15/bionic/string_copy.S @@ -0,0 +1,513 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * Copyright (c) 2013 ARM Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the company may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if !defined(STPCPY) && !defined(STRCPY) +#error "Either STPCPY or STRCPY must be defined." +#endif + +#include <private/bionic_asm.h> + + .syntax unified + + .thumb + .thumb_func + +#if defined(STPCPY) + .macro m_push + push {r4, r5, lr} + .cfi_def_cfa_offset 12 + .cfi_rel_offset r4, 0 + .cfi_rel_offset r5, 4 + .cfi_rel_offset lr, 8 + .endm // m_push +#else + .macro m_push + push {r0, r4, r5, lr} + .cfi_def_cfa_offset 16 + .cfi_rel_offset r0, 0 + .cfi_rel_offset r4, 4 + .cfi_rel_offset r5, 8 + .cfi_rel_offset lr, 12 + .endm // m_push +#endif + +#if defined(STPCPY) + .macro m_pop + pop {r4, r5, pc} + .endm // m_pop +#else + .macro m_pop + pop {r0, r4, r5, pc} + .endm // m_pop +#endif + + .macro m_copy_byte reg, cmd, label + ldrb \reg, [r1], #1 + strb \reg, [r0], #1 + \cmd \reg, \label + .endm // m_copy_byte + +#if defined(STPCPY) +ENTRY(stpcpy) +#else +ENTRY(strcpy) +#endif + // For short copies, hard-code checking the first 8 bytes since this + // new code doesn't win until after about 8 bytes. + m_push + m_copy_byte reg=r2, cmd=cbz, label=.Lstringcopy_finish + m_copy_byte reg=r3, cmd=cbz, label=.Lstringcopy_finish + m_copy_byte reg=r4, cmd=cbz, label=.Lstringcopy_finish + m_copy_byte reg=r5, cmd=cbz, label=.Lstringcopy_finish + m_copy_byte reg=r2, cmd=cbz, label=.Lstringcopy_finish + m_copy_byte reg=r3, cmd=cbz, label=.Lstringcopy_finish + m_copy_byte reg=r4, cmd=cbz, label=.Lstringcopy_finish + m_copy_byte reg=r5, cmd=cbnz, label=.Lstringcopy_continue + +.Lstringcopy_finish: +#if defined(STPCPY) + sub r0, r0, #1 +#endif + m_pop + +.Lstringcopy_continue: + pld [r1, #0] + ands r3, r0, #7 + beq .Lstringcopy_check_src_align + + // Align to a double word (64 bits). + rsb r3, r3, #8 + lsls ip, r3, #31 + beq .Lstringcopy_align_to_32 + + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .Lstringcopy_complete + +.Lstringcopy_align_to_32: + bcc .Lstringcopy_align_to_64 + + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .Lstringcopy_complete + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .Lstringcopy_complete + +.Lstringcopy_align_to_64: + tst r3, #4 + beq .Lstringcopy_check_src_align + ldr r2, [r1], #4 + + sub ip, r2, #0x01010101 + bic ip, ip, r2 + ands ip, ip, #0x80808080 + bne .Lstringcopy_zero_in_first_register + str r2, [r0], #4 + +.Lstringcopy_check_src_align: + // At this point dst is aligned to a double word, check if src + // is also aligned to a double word. + ands r3, r1, #7 + bne .Lstringcopy_unaligned_copy + + .p2align 2 +.Lstringcopy_mainloop: + ldrd r2, r3, [r1], #8 + + pld [r1, #64] + + sub ip, r2, #0x01010101 + bic ip, ip, r2 + ands ip, ip, #0x80808080 + bne .Lstringcopy_zero_in_first_register + + sub ip, r3, #0x01010101 + bic ip, ip, r3 + ands ip, ip, #0x80808080 + bne .Lstringcopy_zero_in_second_register + + strd r2, r3, [r0], #8 + b .Lstringcopy_mainloop + +.Lstringcopy_complete: +#if defined(STPCPY) + sub r0, r0, #1 +#endif + m_pop + +.Lstringcopy_zero_in_first_register: + lsls lr, ip, #17 + bne .Lstringcopy_copy1byte + bcs .Lstringcopy_copy2bytes + lsls ip, ip, #1 + bne .Lstringcopy_copy3bytes + +.Lstringcopy_copy4bytes: + // Copy 4 bytes to the destiniation. +#if defined(STPCPY) + str r2, [r0], #3 +#else + str r2, [r0] +#endif + m_pop + +.Lstringcopy_copy1byte: + strb r2, [r0] + m_pop + +.Lstringcopy_copy2bytes: +#if defined(STPCPY) + strh r2, [r0], #1 +#else + strh r2, [r0] +#endif + m_pop + +.Lstringcopy_copy3bytes: + strh r2, [r0], #2 + lsr r2, #16 + strb r2, [r0] + m_pop + +.Lstringcopy_zero_in_second_register: + lsls lr, ip, #17 + bne .Lstringcopy_copy5bytes + bcs .Lstringcopy_copy6bytes + lsls ip, ip, #1 + bne .Lstringcopy_copy7bytes + + // Copy 8 bytes to the destination. + strd r2, r3, [r0] +#if defined(STPCPY) + add r0, r0, #7 +#endif + m_pop + +.Lstringcopy_copy5bytes: + str r2, [r0], #4 + strb r3, [r0] + m_pop + +.Lstringcopy_copy6bytes: + str r2, [r0], #4 +#if defined(STPCPY) + strh r3, [r0], #1 +#else + strh r3, [r0] +#endif + m_pop + +.Lstringcopy_copy7bytes: + str r2, [r0], #4 + strh r3, [r0], #2 + lsr r3, #16 + strb r3, [r0] + m_pop + +.Lstringcopy_unaligned_copy: + // Dst is aligned to a double word, while src is at an unknown alignment. + // There are 7 different versions of the unaligned copy code + // to prevent overreading the src. The mainloop of every single version + // will store 64 bits per loop. The difference is how much of src can + // be read without potentially crossing a page boundary. + tbb [pc, r3] +.Lstringcopy_unaligned_branchtable: + .byte 0 + .byte ((.Lstringcopy_unalign7 - .Lstringcopy_unaligned_branchtable)/2) + .byte ((.Lstringcopy_unalign6 - .Lstringcopy_unaligned_branchtable)/2) + .byte ((.Lstringcopy_unalign5 - .Lstringcopy_unaligned_branchtable)/2) + .byte ((.Lstringcopy_unalign4 - .Lstringcopy_unaligned_branchtable)/2) + .byte ((.Lstringcopy_unalign3 - .Lstringcopy_unaligned_branchtable)/2) + .byte ((.Lstringcopy_unalign2 - .Lstringcopy_unaligned_branchtable)/2) + .byte ((.Lstringcopy_unalign1 - .Lstringcopy_unaligned_branchtable)/2) + + .p2align 2 + // Can read 7 bytes before possibly crossing a page. +.Lstringcopy_unalign7: + ldr r2, [r1], #4 + + sub ip, r2, #0x01010101 + bic ip, ip, r2 + ands ip, ip, #0x80808080 + bne .Lstringcopy_zero_in_first_register + + ldrb r3, [r1] + cbz r3, .Lstringcopy_unalign7_copy5bytes + ldrb r4, [r1, #1] + cbz r4, .Lstringcopy_unalign7_copy6bytes + ldrb r5, [r1, #2] + cbz r5, .Lstringcopy_unalign7_copy7bytes + + ldr r3, [r1], #4 + pld [r1, #64] + + lsrs ip, r3, #24 + strd r2, r3, [r0], #8 +#if defined(STPCPY) + beq .Lstringcopy_finish +#else + beq .Lstringcopy_unalign_return +#endif + b .Lstringcopy_unalign7 + +.Lstringcopy_unalign7_copy5bytes: + str r2, [r0], #4 + strb r3, [r0] +.Lstringcopy_unalign_return: + m_pop + +.Lstringcopy_unalign7_copy6bytes: + str r2, [r0], #4 + strb r3, [r0], #1 + strb r4, [r0] + m_pop + +.Lstringcopy_unalign7_copy7bytes: + str r2, [r0], #4 + strb r3, [r0], #1 + strb r4, [r0], #1 + strb r5, [r0] + m_pop + + .p2align 2 + // Can read 6 bytes before possibly crossing a page. +.Lstringcopy_unalign6: + ldr r2, [r1], #4 + + sub ip, r2, #0x01010101 + bic ip, ip, r2 + ands ip, ip, #0x80808080 + bne .Lstringcopy_zero_in_first_register + + ldrb r4, [r1] + cbz r4, .Lstringcopy_unalign_copy5bytes + ldrb r5, [r1, #1] + cbz r5, .Lstringcopy_unalign_copy6bytes + + ldr r3, [r1], #4 + pld [r1, #64] + + tst r3, #0xff0000 + beq .Lstringcopy_copy7bytes + lsrs ip, r3, #24 + strd r2, r3, [r0], #8 +#if defined(STPCPY) + beq .Lstringcopy_finish +#else + beq .Lstringcopy_unalign_return +#endif + b .Lstringcopy_unalign6 + + .p2align 2 + // Can read 5 bytes before possibly crossing a page. +.Lstringcopy_unalign5: + ldr r2, [r1], #4 + + sub ip, r2, #0x01010101 + bic ip, ip, r2 + ands ip, ip, #0x80808080 + bne .Lstringcopy_zero_in_first_register + + ldrb r4, [r1] + cbz r4, .Lstringcopy_unalign_copy5bytes + + ldr r3, [r1], #4 + + pld [r1, #64] + + sub ip, r3, #0x01010101 + bic ip, ip, r3 + ands ip, ip, #0x80808080 + bne .Lstringcopy_zero_in_second_register + + strd r2, r3, [r0], #8 + b .Lstringcopy_unalign5 + +.Lstringcopy_unalign_copy5bytes: + str r2, [r0], #4 + strb r4, [r0] + m_pop + +.Lstringcopy_unalign_copy6bytes: + str r2, [r0], #4 + strb r4, [r0], #1 + strb r5, [r0] + m_pop + + .p2align 2 + // Can read 4 bytes before possibly crossing a page. +.Lstringcopy_unalign4: + ldr r2, [r1], #4 + + sub ip, r2, #0x01010101 + bic ip, ip, r2 + ands ip, ip, #0x80808080 + bne .Lstringcopy_zero_in_first_register + + ldr r3, [r1], #4 + pld [r1, #64] + + sub ip, r3, #0x01010101 + bic ip, ip, r3 + ands ip, ip, #0x80808080 + bne .Lstringcopy_zero_in_second_register + + strd r2, r3, [r0], #8 + b .Lstringcopy_unalign4 + + .p2align 2 + // Can read 3 bytes before possibly crossing a page. +.Lstringcopy_unalign3: + ldrb r2, [r1] + cbz r2, .Lstringcopy_unalign3_copy1byte + ldrb r3, [r1, #1] + cbz r3, .Lstringcopy_unalign3_copy2bytes + ldrb r4, [r1, #2] + cbz r4, .Lstringcopy_unalign3_copy3bytes + + ldr r2, [r1], #4 + ldr r3, [r1], #4 + + pld [r1, #64] + + lsrs lr, r2, #24 + beq .Lstringcopy_copy4bytes + + sub ip, r3, #0x01010101 + bic ip, ip, r3 + ands ip, ip, #0x80808080 + bne .Lstringcopy_zero_in_second_register + + strd r2, r3, [r0], #8 + b .Lstringcopy_unalign3 + +.Lstringcopy_unalign3_copy1byte: + strb r2, [r0] + m_pop + +.Lstringcopy_unalign3_copy2bytes: + strb r2, [r0], #1 + strb r3, [r0] + m_pop + +.Lstringcopy_unalign3_copy3bytes: + strb r2, [r0], #1 + strb r3, [r0], #1 + strb r4, [r0] + m_pop + + .p2align 2 + // Can read 2 bytes before possibly crossing a page. +.Lstringcopy_unalign2: + ldrb r2, [r1] + cbz r2, .Lstringcopy_unalign_copy1byte + ldrb r4, [r1, #1] + cbz r4, .Lstringcopy_unalign_copy2bytes + + ldr r2, [r1], #4 + ldr r3, [r1], #4 + pld [r1, #64] + + tst r2, #0xff0000 + beq .Lstringcopy_copy3bytes + lsrs ip, r2, #24 + beq .Lstringcopy_copy4bytes + + sub ip, r3, #0x01010101 + bic ip, ip, r3 + ands ip, ip, #0x80808080 + bne .Lstringcopy_zero_in_second_register + + strd r2, r3, [r0], #8 + b .Lstringcopy_unalign2 + + .p2align 2 + // Can read 1 byte before possibly crossing a page. +.Lstringcopy_unalign1: + ldrb r2, [r1] + cbz r2, .Lstringcopy_unalign_copy1byte + + ldr r2, [r1], #4 + ldr r3, [r1], #4 + + pld [r1, #64] + + sub ip, r2, #0x01010101 + bic ip, ip, r2 + ands ip, ip, #0x80808080 + bne .Lstringcopy_zero_in_first_register + + sub ip, r3, #0x01010101 + bic ip, ip, r3 + ands ip, ip, #0x80808080 + bne .Lstringcopy_zero_in_second_register + + strd r2, r3, [r0], #8 + b .Lstringcopy_unalign1 + +.Lstringcopy_unalign_copy1byte: + strb r2, [r0] + m_pop + +.Lstringcopy_unalign_copy2bytes: + strb r2, [r0], #1 + strb r4, [r0] + m_pop +#if defined(STPCPY) +END(stpcpy) +#else +END(strcpy) +#endif diff --git a/libc/arch-arm/cortex-a15/cortex-a15.mk b/libc/arch-arm/cortex-a15/cortex-a15.mk index 552811e..f1abe32 100644 --- a/libc/arch-arm/cortex-a15/cortex-a15.mk +++ b/libc/arch-arm/cortex-a15/cortex-a15.mk @@ -1,10 +1,11 @@ libc_bionic_src_files_arm += \ arch-arm/cortex-a15/bionic/memcpy.S \ arch-arm/cortex-a15/bionic/memset.S \ + arch-arm/cortex-a15/bionic/stpcpy.S \ arch-arm/cortex-a15/bionic/strcat.S \ + arch-arm/cortex-a15/bionic/__strcat_chk.S \ arch-arm/cortex-a15/bionic/strcmp.S \ arch-arm/cortex-a15/bionic/strcpy.S \ - arch-arm/cortex-a15/bionic/strlen.S \ - arch-arm/cortex-a15/bionic/__strcat_chk.S \ arch-arm/cortex-a15/bionic/__strcpy_chk.S \ + arch-arm/cortex-a15/bionic/strlen.S \ bionic/memmove.c \ diff --git a/libc/arch-arm/cortex-a9/bionic/__strcat_chk.S b/libc/arch-arm/cortex-a9/bionic/__strcat_chk.S index 651aefc..45517f1 100644 --- a/libc/arch-arm/cortex-a9/bionic/__strcat_chk.S +++ b/libc/arch-arm/cortex-a9/bionic/__strcat_chk.S @@ -40,12 +40,10 @@ ENTRY(__strcat_chk) pld [r0, #0] push {r0, lr} - .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 push {r4, r5} - .save {r4, r5} .cfi_adjust_cfa_offset 8 .cfi_rel_offset r4, 0 .cfi_rel_offset r5, 4 @@ -199,8 +197,6 @@ END(__strcat_chk) #include "memcpy_base.S" ENTRY_PRIVATE(__strcat_chk_fail) - .save {r0, lr} - .save {r4, r5} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 diff --git a/libc/arch-arm/cortex-a9/bionic/__strcpy_chk.S b/libc/arch-arm/cortex-a9/bionic/__strcpy_chk.S index 2447780..67eca08 100644 --- a/libc/arch-arm/cortex-a9/bionic/__strcpy_chk.S +++ b/libc/arch-arm/cortex-a9/bionic/__strcpy_chk.S @@ -39,7 +39,6 @@ ENTRY(__strcpy_chk) pld [r0, #0] push {r0, lr} - .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 @@ -165,7 +164,6 @@ END(__strcpy_chk) #include "memcpy_base.S" ENTRY_PRIVATE(__strcpy_chk_fail) - .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 diff --git a/libc/arch-arm/cortex-a9/bionic/memcpy.S b/libc/arch-arm/cortex-a9/bionic/memcpy.S index 8dcd937..db3e26f 100644 --- a/libc/arch-arm/cortex-a9/bionic/memcpy.S +++ b/libc/arch-arm/cortex-a9/bionic/memcpy.S @@ -50,7 +50,6 @@ END(__memcpy_chk) ENTRY(memcpy) pld [r1, #0] stmfd sp!, {r0, lr} - .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 @@ -64,7 +63,6 @@ END(memcpy) ENTRY_PRIVATE(__memcpy_chk_fail) // Preserve lr for backtrace. push {lr} - .save {lr} .cfi_def_cfa_offset 4 .cfi_rel_offset lr, 0 diff --git a/libc/arch-arm/cortex-a9/bionic/memcpy_base.S b/libc/arch-arm/cortex-a9/bionic/memcpy_base.S index c385657..5e81305 100644 --- a/libc/arch-arm/cortex-a9/bionic/memcpy_base.S +++ b/libc/arch-arm/cortex-a9/bionic/memcpy_base.S @@ -33,7 +33,6 @@ */ ENTRY_PRIVATE(MEMCPY_BASE) - .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 @@ -139,14 +138,12 @@ ENTRY_PRIVATE(MEMCPY_BASE) END(MEMCPY_BASE) ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED) - .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 /* Simple arm-only copy loop to handle aligned copy operations */ stmfd sp!, {r4-r8} - .save {r4-r8} .cfi_adjust_cfa_offset 20 .cfi_rel_offset r4, 0 .cfi_rel_offset r5, 4 diff --git a/libc/arch-arm/cortex-a9/bionic/memset.S b/libc/arch-arm/cortex-a9/bionic/memset.S index a5057eb..299f5a2 100644 --- a/libc/arch-arm/cortex-a9/bionic/memset.S +++ b/libc/arch-arm/cortex-a9/bionic/memset.S @@ -42,7 +42,6 @@ ENTRY(__memset_chk) // Preserve lr for backtrace. push {lr} - .save {lr} .cfi_def_cfa_offset 4 .cfi_rel_offset lr, 0 @@ -72,7 +71,6 @@ ENTRY(memset) bhi __memset_large_copy stmfd sp!, {r0} - .save {r0} .cfi_def_cfa_offset 4 .cfi_rel_offset r0, 0 @@ -114,7 +112,6 @@ ENTRY_PRIVATE(__memset_large_copy) * offset = (4-(src&3))&3 = -src & 3 */ stmfd sp!, {r0, r4-r7, lr} - .save {r0, r4-r7, lr} .cfi_def_cfa_offset 24 .cfi_rel_offset r0, 0 .cfi_rel_offset r4, 4 diff --git a/libc/arch-arm/cortex-a9/bionic/stpcpy.S b/libc/arch-arm/cortex-a9/bionic/stpcpy.S new file mode 100644 index 0000000..740523b --- /dev/null +++ b/libc/arch-arm/cortex-a9/bionic/stpcpy.S @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#define STPCPY +#include "string_copy.S" diff --git a/libc/arch-arm/cortex-a9/bionic/strcmp.S b/libc/arch-arm/cortex-a9/bionic/strcmp.S index 2411c65..4ff26c0 100644 --- a/libc/arch-arm/cortex-a9/bionic/strcmp.S +++ b/libc/arch-arm/cortex-a9/bionic/strcmp.S @@ -168,7 +168,6 @@ ENTRY(strcmp) bne .L_do_align /* Fast path. */ - .save {r4-r7} init .L_doubleword_aligned: diff --git a/libc/arch-arm/cortex-a9/bionic/strcpy.S b/libc/arch-arm/cortex-a9/bionic/strcpy.S index 9e9610b..951face 100644 --- a/libc/arch-arm/cortex-a9/bionic/strcpy.S +++ b/libc/arch-arm/cortex-a9/bionic/strcpy.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013 The Android Open Source Project + * Copyright (C) 2014 The Android Open Source Project * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -25,432 +25,6 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ -/* - * Copyright (c) 2013 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <private/bionic_asm.h> - - .syntax unified - - .thumb - .thumb_func - - .macro m_push - push {r0, r4, r5, lr} - .endm // m_push - - .macro m_ret inst - \inst {r0, r4, r5, pc} - .endm // m_ret - - .macro m_copy_byte reg, cmd, label - ldrb \reg, [r1], #1 - strb \reg, [r0], #1 - \cmd \reg, \label - .endm // m_copy_byte - -ENTRY(strcpy) - // Unroll the first 8 bytes that will be copied. - m_push - m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue - -strcpy_finish: - m_ret inst=pop - -strcpy_continue: - pld [r1, #0] - ands r3, r0, #7 - bne strcpy_align_dst - -strcpy_check_src_align: - // At this point dst is aligned to a double word, check if src - // is also aligned to a double word. - ands r3, r1, #7 - bne strcpy_unaligned_copy - - .p2align 2 -strcpy_mainloop: - ldmia r1!, {r2, r3} - - pld [r1, #64] - - sub ip, r2, #0x01010101 - bic ip, ip, r2 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register - - sub ip, r3, #0x01010101 - bic ip, ip, r3 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register - - stmia r0!, {r2, r3} - b strcpy_mainloop - -strcpy_zero_in_first_register: - lsls lr, ip, #17 - itt ne - strbne r2, [r0] - m_ret inst=popne - itt cs - strhcs r2, [r0] - m_ret inst=popcs - lsls ip, ip, #1 - itt eq - streq r2, [r0] - m_ret inst=popeq - strh r2, [r0], #2 - lsr r3, r2, #16 - strb r3, [r0] - m_ret inst=pop - -strcpy_zero_in_second_register: - lsls lr, ip, #17 - ittt ne - stmiane r0!, {r2} - strbne r3, [r0] - m_ret inst=popne - ittt cs - strcs r2, [r0], #4 - strhcs r3, [r0] - m_ret inst=popcs - lsls ip, ip, #1 - itt eq - stmiaeq r0, {r2, r3} - m_ret inst=popeq - stmia r0!, {r2} - strh r3, [r0], #2 - lsr r4, r3, #16 - strb r4, [r0] - m_ret inst=pop - -strcpy_align_dst: - // Align to a double word (64 bits). - rsb r3, r3, #8 - lsls ip, r3, #31 - beq strcpy_align_to_32 - - ldrb r2, [r1], #1 - strb r2, [r0], #1 - cbz r2, strcpy_complete - -strcpy_align_to_32: - bcc strcpy_align_to_64 - - ldrb r4, [r1], #1 - strb r4, [r0], #1 - cmp r4, #0 - it eq - m_ret inst=popeq - ldrb r5, [r1], #1 - strb r5, [r0], #1 - cmp r5, #0 - it eq - m_ret inst=popeq - -strcpy_align_to_64: - tst r3, #4 - beq strcpy_check_src_align - ldr r2, [r1], #4 - - sub ip, r2, #0x01010101 - bic ip, ip, r2 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register - stmia r0!, {r2} - b strcpy_check_src_align - -strcpy_complete: - m_ret inst=pop - -strcpy_unaligned_copy: - // Dst is aligned to a double word, while src is at an unknown alignment. - // There are 7 different versions of the unaligned copy code - // to prevent overreading the src. The mainloop of every single version - // will store 64 bits per loop. The difference is how much of src can - // be read without potentially crossing a page boundary. - tbb [pc, r3] -strcpy_unaligned_branchtable: - .byte 0 - .byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2) - - .p2align 2 - // Can read 7 bytes before possibly crossing a page. -strcpy_unalign7: - ldr r2, [r1], #4 - - sub ip, r2, #0x01010101 - bic ip, ip, r2 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register - - ldrb r3, [r1] - cbz r3, strcpy_unalign7_copy5bytes - ldrb r4, [r1, #1] - cbz r4, strcpy_unalign7_copy6bytes - ldrb r5, [r1, #2] - cbz r5, strcpy_unalign7_copy7bytes - - ldr r3, [r1], #4 - pld [r1, #64] - - lsrs ip, r3, #24 - stmia r0!, {r2, r3} - beq strcpy_unalign_return - b strcpy_unalign7 - -strcpy_unalign7_copy5bytes: - stmia r0!, {r2} - strb r3, [r0] -strcpy_unalign_return: - m_ret inst=pop - -strcpy_unalign7_copy6bytes: - stmia r0!, {r2} - strb r3, [r0], #1 - strb r4, [r0], #1 - m_ret inst=pop - -strcpy_unalign7_copy7bytes: - stmia r0!, {r2} - strb r3, [r0], #1 - strb r4, [r0], #1 - strb r5, [r0], #1 - m_ret inst=pop - - .p2align 2 - // Can read 6 bytes before possibly crossing a page. -strcpy_unalign6: - ldr r2, [r1], #4 - - sub ip, r2, #0x01010101 - bic ip, ip, r2 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register - - ldrb r4, [r1] - cbz r4, strcpy_unalign_copy5bytes - ldrb r5, [r1, #1] - cbz r5, strcpy_unalign_copy6bytes - - ldr r3, [r1], #4 - pld [r1, #64] - - tst r3, #0xff0000 - beq strcpy_unalign6_copy7bytes - lsrs ip, r3, #24 - stmia r0!, {r2, r3} - beq strcpy_unalign_return - b strcpy_unalign6 - -strcpy_unalign6_copy7bytes: - stmia r0!, {r2} - strh r3, [r0], #2 - lsr r3, #16 - strb r3, [r0] - m_ret inst=pop - - .p2align 2 - // Can read 5 bytes before possibly crossing a page. -strcpy_unalign5: - ldr r2, [r1], #4 - - sub ip, r2, #0x01010101 - bic ip, ip, r2 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register - - ldrb r4, [r1] - cbz r4, strcpy_unalign_copy5bytes - - ldr r3, [r1], #4 - - pld [r1, #64] - - sub ip, r3, #0x01010101 - bic ip, ip, r3 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register - - stmia r0!, {r2, r3} - b strcpy_unalign5 - -strcpy_unalign_copy5bytes: - stmia r0!, {r2} - strb r4, [r0] - m_ret inst=pop - -strcpy_unalign_copy6bytes: - stmia r0!, {r2} - strb r4, [r0], #1 - strb r5, [r0] - m_ret inst=pop - - .p2align 2 - // Can read 4 bytes before possibly crossing a page. -strcpy_unalign4: - ldmia r1!, {r2} - - sub ip, r2, #0x01010101 - bic ip, ip, r2 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register - - ldmia r1!, {r3} - pld [r1, #64] - - sub ip, r3, #0x01010101 - bic ip, ip, r3 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register - - stmia r0!, {r2, r3} - b strcpy_unalign4 - - .p2align 2 - // Can read 3 bytes before possibly crossing a page. -strcpy_unalign3: - ldrb r2, [r1] - cbz r2, strcpy_unalign3_copy1byte - ldrb r3, [r1, #1] - cbz r3, strcpy_unalign3_copy2bytes - ldrb r4, [r1, #2] - cbz r4, strcpy_unalign3_copy3bytes - - ldr r2, [r1], #4 - ldr r3, [r1], #4 - - pld [r1, #64] - - lsrs lr, r2, #24 - beq strcpy_unalign_copy4bytes - - sub ip, r3, #0x01010101 - bic ip, ip, r3 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register - - stmia r0!, {r2, r3} - b strcpy_unalign3 - -strcpy_unalign3_copy1byte: - strb r2, [r0] - m_ret inst=pop - -strcpy_unalign3_copy2bytes: - strb r2, [r0], #1 - strb r3, [r0] - m_ret inst=pop - -strcpy_unalign3_copy3bytes: - strb r2, [r0], #1 - strb r3, [r0], #1 - strb r4, [r0] - m_ret inst=pop - - .p2align 2 - // Can read 2 bytes before possibly crossing a page. -strcpy_unalign2: - ldrb r2, [r1] - cbz r2, strcpy_unalign_copy1byte - ldrb r3, [r1, #1] - cbz r3, strcpy_unalign_copy2bytes - - ldr r2, [r1], #4 - ldr r3, [r1], #4 - pld [r1, #64] - - tst r2, #0xff0000 - beq strcpy_unalign_copy3bytes - lsrs ip, r2, #24 - beq strcpy_unalign_copy4bytes - - sub ip, r3, #0x01010101 - bic ip, ip, r3 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register - - stmia r0!, {r2, r3} - b strcpy_unalign2 - - .p2align 2 - // Can read 1 byte before possibly crossing a page. -strcpy_unalign1: - ldrb r2, [r1] - cbz r2, strcpy_unalign_copy1byte - - ldr r2, [r1], #4 - ldr r3, [r1], #4 - - pld [r1, #64] - - sub ip, r2, #0x01010101 - bic ip, ip, r2 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register - - sub ip, r3, #0x01010101 - bic ip, ip, r3 - ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register - - stmia r0!, {r2, r3} - b strcpy_unalign1 - -strcpy_unalign_copy1byte: - strb r2, [r0] - m_ret inst=pop - -strcpy_unalign_copy2bytes: - strb r2, [r0], #1 - strb r3, [r0] - m_ret inst=pop - -strcpy_unalign_copy3bytes: - strh r2, [r0], #2 - lsr r2, #16 - strb r2, [r0] - m_ret inst=pop -strcpy_unalign_copy4bytes: - stmia r0, {r2} - m_ret inst=pop -END(strcpy) +#define STRCPY +#include "string_copy.S" diff --git a/libc/arch-arm/cortex-a9/bionic/string_copy.S b/libc/arch-arm/cortex-a9/bionic/string_copy.S new file mode 100644 index 0000000..caf5a11 --- /dev/null +++ b/libc/arch-arm/cortex-a9/bionic/string_copy.S @@ -0,0 +1,535 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * Copyright (c) 2013 ARM Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the company may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if !defined(STPCPY) && !defined(STRCPY) +#error "Either STPCPY or STRCPY must be defined." +#endif + +#include <private/bionic_asm.h> + + .syntax unified + + .thumb + .thumb_func + +#if defined(STPCPY) + .macro m_push + push {r4, r5, lr} + .cfi_def_cfa_offset 12 + .cfi_rel_offset r4, 0 + .cfi_rel_offset r5, 4 + .cfi_rel_offset lr, 8 + .endm // m_push +#else + .macro m_push + push {r0, r4, r5, lr} + .cfi_def_cfa_offset 16 + .cfi_rel_offset r0, 0 + .cfi_rel_offset r4, 4 + .cfi_rel_offset r5, 8 + .cfi_rel_offset lr, 12 + .endm // m_push +#endif + +#if defined(STPCPY) + .macro m_ret inst + \inst {r4, r5, pc} + .endm // m_ret +#else + .macro m_ret inst + \inst {r0, r4, r5, pc} + .endm // m_ret +#endif + + .macro m_copy_byte reg, cmd, label + ldrb \reg, [r1], #1 + strb \reg, [r0], #1 + \cmd \reg, \label + .endm // m_copy_byte + +#if defined(STPCPY) +ENTRY(stpcpy) +#else +ENTRY(strcpy) +#endif + // Unroll the first 8 bytes that will be copied. + m_push + m_copy_byte reg=r2, cmd=cbz, label=.Lstringcopy_finish + m_copy_byte reg=r3, cmd=cbz, label=.Lstringcopy_finish + m_copy_byte reg=r4, cmd=cbz, label=.Lstringcopy_finish + m_copy_byte reg=r5, cmd=cbz, label=.Lstringcopy_finish + m_copy_byte reg=r2, cmd=cbz, label=.Lstringcopy_finish + m_copy_byte reg=r3, cmd=cbz, label=.Lstringcopy_finish + m_copy_byte reg=r4, cmd=cbz, label=.Lstringcopy_finish + m_copy_byte reg=r5, cmd=cbnz, label=.Lstringcopy_continue + +.Lstringcopy_finish: +#if defined(STPCPY) + sub r0, r0, #1 +#endif + m_ret inst=pop + +.Lstringcopy_continue: + pld [r1, #0] + ands r3, r0, #7 + bne .Lstringcopy_align_dst + +.Lstringcopy_check_src_align: + // At this point dst is aligned to a double word, check if src + // is also aligned to a double word. + ands r3, r1, #7 + bne .Lstringcopy_unaligned_copy + + .p2align 2 +.Lstringcopy_mainloop: + ldmia r1!, {r2, r3} + + pld [r1, #64] + + sub ip, r2, #0x01010101 + bic ip, ip, r2 + ands ip, ip, #0x80808080 + bne .Lstringcopy_zero_in_first_register + + sub ip, r3, #0x01010101 + bic ip, ip, r3 + ands ip, ip, #0x80808080 + bne .Lstringcopy_zero_in_second_register + + stmia r0!, {r2, r3} + b .Lstringcopy_mainloop + +.Lstringcopy_zero_in_first_register: + lsls lr, ip, #17 + itt ne + strbne r2, [r0] + m_ret inst=popne + itt cs +#if defined(STPCPY) + strhcs r2, [r0], #1 +#else + strhcs r2, [r0] +#endif + m_ret inst=popcs + lsls ip, ip, #1 + itt eq +#if defined(STPCPY) + streq r2, [r0], #3 +#else + streq r2, [r0] +#endif + m_ret inst=popeq + strh r2, [r0], #2 + lsr r3, r2, #16 + strb r3, [r0] + m_ret inst=pop + +.Lstringcopy_zero_in_second_register: + lsls lr, ip, #17 + ittt ne + stmiane r0!, {r2} + strbne r3, [r0] + m_ret inst=popne + ittt cs + strcs r2, [r0], #4 +#if defined(STPCPY) + strhcs r3, [r0], #1 +#else + strhcs r3, [r0] +#endif + m_ret inst=popcs + lsls ip, ip, #1 +#if defined(STPCPY) + ittt eq +#else + itt eq +#endif + stmiaeq r0, {r2, r3} +#if defined(STPCPY) + addeq r0, r0, #7 +#endif + m_ret inst=popeq + stmia r0!, {r2} + strh r3, [r0], #2 + lsr r4, r3, #16 + strb r4, [r0] + m_ret inst=pop + +.Lstringcopy_align_dst: + // Align to a double word (64 bits). + rsb r3, r3, #8 + lsls ip, r3, #31 + beq .Lstringcopy_align_to_32 + + ldrb r2, [r1], #1 + strb r2, [r0], #1 + cbz r2, .Lstringcopy_complete + +.Lstringcopy_align_to_32: + bcc .Lstringcopy_align_to_64 + + ldrb r4, [r1], #1 + strb r4, [r0], #1 + cmp r4, #0 +#if defined(STPCPY) + itt eq + subeq r0, r0, #1 +#else + it eq +#endif + m_ret inst=popeq + ldrb r5, [r1], #1 + strb r5, [r0], #1 + cmp r5, #0 +#if defined(STPCPY) + itt eq + subeq r0, r0, #1 +#else + it eq +#endif + m_ret inst=popeq + +.Lstringcopy_align_to_64: + tst r3, #4 + beq .Lstringcopy_check_src_align + ldr r2, [r1], #4 + + sub ip, r2, #0x01010101 + bic ip, ip, r2 + ands ip, ip, #0x80808080 + bne .Lstringcopy_zero_in_first_register + stmia r0!, {r2} + b .Lstringcopy_check_src_align + +.Lstringcopy_complete: +#if defined(STPCPY) + sub r0, r0, #1 +#endif + m_ret inst=pop + +.Lstringcopy_unaligned_copy: + // Dst is aligned to a double word, while src is at an unknown alignment. + // There are 7 different versions of the unaligned copy code + // to prevent overreading the src. The mainloop of every single version + // will store 64 bits per loop. The difference is how much of src can + // be read without potentially crossing a page boundary. + tbb [pc, r3] +.Lstringcopy_unaligned_branchtable: + .byte 0 + .byte ((.Lstringcopy_unalign7 - .Lstringcopy_unaligned_branchtable)/2) + .byte ((.Lstringcopy_unalign6 - .Lstringcopy_unaligned_branchtable)/2) + .byte ((.Lstringcopy_unalign5 - .Lstringcopy_unaligned_branchtable)/2) + .byte ((.Lstringcopy_unalign4 - .Lstringcopy_unaligned_branchtable)/2) + .byte ((.Lstringcopy_unalign3 - .Lstringcopy_unaligned_branchtable)/2) + .byte ((.Lstringcopy_unalign2 - .Lstringcopy_unaligned_branchtable)/2) + .byte ((.Lstringcopy_unalign1 - .Lstringcopy_unaligned_branchtable)/2) + + .p2align 2 + // Can read 7 bytes before possibly crossing a page. +.Lstringcopy_unalign7: + ldr r2, [r1], #4 + + sub ip, r2, #0x01010101 + bic ip, ip, r2 + ands ip, ip, #0x80808080 + bne .Lstringcopy_zero_in_first_register + + ldrb r3, [r1] + cbz r3, .Lstringcopy_unalign7_copy5bytes + ldrb r4, [r1, #1] + cbz r4, .Lstringcopy_unalign7_copy6bytes + ldrb r5, [r1, #2] + cbz r5, .Lstringcopy_unalign7_copy7bytes + + ldr r3, [r1], #4 + pld [r1, #64] + + lsrs ip, r3, #24 + stmia r0!, {r2, r3} +#if defined(STPCPY) + beq .Lstringcopy_finish +#else + beq .Lstringcopy_unalign_return +#endif + b .Lstringcopy_unalign7 + +.Lstringcopy_unalign7_copy5bytes: + stmia r0!, {r2} + strb r3, [r0] +.Lstringcopy_unalign_return: + m_ret inst=pop + +.Lstringcopy_unalign7_copy6bytes: + stmia r0!, {r2} + strb r3, [r0], #1 + strb r4, [r0] + m_ret inst=pop + +.Lstringcopy_unalign7_copy7bytes: + stmia r0!, {r2} + strb r3, [r0], #1 + strb r4, [r0], #1 + strb r5, [r0] + m_ret inst=pop + + .p2align 2 + // Can read 6 bytes before possibly crossing a page. +.Lstringcopy_unalign6: + ldr r2, [r1], #4 + + sub ip, r2, #0x01010101 + bic ip, ip, r2 + ands ip, ip, #0x80808080 + bne .Lstringcopy_zero_in_first_register + + ldrb r4, [r1] + cbz r4, .Lstringcopy_unalign_copy5bytes + ldrb r5, [r1, #1] + cbz r5, .Lstringcopy_unalign_copy6bytes + + ldr r3, [r1], #4 + pld [r1, #64] + + tst r3, #0xff0000 + beq .Lstringcopy_unalign6_copy7bytes + lsrs ip, r3, #24 + stmia r0!, {r2, r3} +#if defined(STPCPY) + beq .Lstringcopy_finish +#else + beq .Lstringcopy_unalign_return +#endif + b .Lstringcopy_unalign6 + +.Lstringcopy_unalign6_copy7bytes: + stmia r0!, {r2} + strh r3, [r0], #2 + lsr r3, #16 + strb r3, [r0] + m_ret inst=pop + + .p2align 2 + // Can read 5 bytes before possibly crossing a page. +.Lstringcopy_unalign5: + ldr r2, [r1], #4 + + sub ip, r2, #0x01010101 + bic ip, ip, r2 + ands ip, ip, #0x80808080 + bne .Lstringcopy_zero_in_first_register + + ldrb r4, [r1] + cbz r4, .Lstringcopy_unalign_copy5bytes + + ldr r3, [r1], #4 + + pld [r1, #64] + + sub ip, r3, #0x01010101 + bic ip, ip, r3 + ands ip, ip, #0x80808080 + bne .Lstringcopy_zero_in_second_register + + stmia r0!, {r2, r3} + b .Lstringcopy_unalign5 + +.Lstringcopy_unalign_copy5bytes: + stmia r0!, {r2} + strb r4, [r0] + m_ret inst=pop + +.Lstringcopy_unalign_copy6bytes: + stmia r0!, {r2} + strb r4, [r0], #1 + strb r5, [r0] + m_ret inst=pop + + .p2align 2 + // Can read 4 bytes before possibly crossing a page. +.Lstringcopy_unalign4: + ldmia r1!, {r2} + + sub ip, r2, #0x01010101 + bic ip, ip, r2 + ands ip, ip, #0x80808080 + bne .Lstringcopy_zero_in_first_register + + ldmia r1!, {r3} + pld [r1, #64] + + sub ip, r3, #0x01010101 + bic ip, ip, r3 + ands ip, ip, #0x80808080 + bne .Lstringcopy_zero_in_second_register + + stmia r0!, {r2, r3} + b .Lstringcopy_unalign4 + + .p2align 2 + // Can read 3 bytes before possibly crossing a page. +.Lstringcopy_unalign3: + ldrb r2, [r1] + cbz r2, .Lstringcopy_unalign3_copy1byte + ldrb r3, [r1, #1] + cbz r3, .Lstringcopy_unalign3_copy2bytes + ldrb r4, [r1, #2] + cbz r4, .Lstringcopy_unalign3_copy3bytes + + ldr r2, [r1], #4 + ldr r3, [r1], #4 + + pld [r1, #64] + + lsrs lr, r2, #24 + beq .Lstringcopy_unalign_copy4bytes + + sub ip, r3, #0x01010101 + bic ip, ip, r3 + ands ip, ip, #0x80808080 + bne .Lstringcopy_zero_in_second_register + + stmia r0!, {r2, r3} + b .Lstringcopy_unalign3 + +.Lstringcopy_unalign3_copy1byte: + strb r2, [r0] + m_ret inst=pop + +.Lstringcopy_unalign3_copy2bytes: + strb r2, [r0], #1 + strb r3, [r0] + m_ret inst=pop + +.Lstringcopy_unalign3_copy3bytes: + strb r2, [r0], #1 + strb r3, [r0], #1 + strb r4, [r0] + m_ret inst=pop + + .p2align 2 + // Can read 2 bytes before possibly crossing a page. +.Lstringcopy_unalign2: + ldrb r2, [r1] + cbz r2, .Lstringcopy_unalign_copy1byte + ldrb r3, [r1, #1] + cbz r3, .Lstringcopy_unalign_copy2bytes + + ldr r2, [r1], #4 + ldr r3, [r1], #4 + pld [r1, #64] + + tst r2, #0xff0000 + beq .Lstringcopy_unalign_copy3bytes + lsrs ip, r2, #24 + beq .Lstringcopy_unalign_copy4bytes + + sub ip, r3, #0x01010101 + bic ip, ip, r3 + ands ip, ip, #0x80808080 + bne .Lstringcopy_zero_in_second_register + + stmia r0!, {r2, r3} + b .Lstringcopy_unalign2 + + .p2align 2 + // Can read 1 byte before possibly crossing a page. +.Lstringcopy_unalign1: + ldrb r2, [r1] + cbz r2, .Lstringcopy_unalign_copy1byte + + ldr r2, [r1], #4 + ldr r3, [r1], #4 + + pld [r1, #64] + + sub ip, r2, #0x01010101 + bic ip, ip, r2 + ands ip, ip, #0x80808080 + bne .Lstringcopy_zero_in_first_register + + sub ip, r3, #0x01010101 + bic ip, ip, r3 + ands ip, ip, #0x80808080 + bne .Lstringcopy_zero_in_second_register + + stmia r0!, {r2, r3} + b .Lstringcopy_unalign1 + +.Lstringcopy_unalign_copy1byte: + strb r2, [r0] + m_ret inst=pop + +.Lstringcopy_unalign_copy2bytes: + strb r2, [r0], #1 + strb r3, [r0] + m_ret inst=pop + +.Lstringcopy_unalign_copy3bytes: + strh r2, [r0], #2 + lsr r2, #16 + strb r2, [r0] + m_ret inst=pop + +.Lstringcopy_unalign_copy4bytes: + stmia r0, {r2} +#if defined(STPCPY) + add r0, r0, #3 +#endif + m_ret inst=pop +#if defined(STPCPY) +END(stpcpy) +#else +END(strcpy) +#endif diff --git a/libc/arch-arm/cortex-a9/cortex-a9.mk b/libc/arch-arm/cortex-a9/cortex-a9.mk index 9b99387..c82db3b 100644 --- a/libc/arch-arm/cortex-a9/cortex-a9.mk +++ b/libc/arch-arm/cortex-a9/cortex-a9.mk @@ -1,10 +1,11 @@ libc_bionic_src_files_arm += \ arch-arm/cortex-a9/bionic/memcpy.S \ arch-arm/cortex-a9/bionic/memset.S \ + arch-arm/cortex-a9/bionic/stpcpy.S \ arch-arm/cortex-a9/bionic/strcat.S \ + arch-arm/cortex-a9/bionic/__strcat_chk.S \ arch-arm/cortex-a9/bionic/strcmp.S \ arch-arm/cortex-a9/bionic/strcpy.S \ - arch-arm/cortex-a9/bionic/strlen.S \ - arch-arm/cortex-a9/bionic/__strcat_chk.S \ arch-arm/cortex-a9/bionic/__strcpy_chk.S \ + arch-arm/cortex-a9/bionic/strlen.S \ bionic/memmove.c \ diff --git a/libc/arch-arm/denver/bionic/__strcat_chk.S b/libc/arch-arm/denver/bionic/__strcat_chk.S index 36da2d9..a2e9c22 100644 --- a/libc/arch-arm/denver/bionic/__strcat_chk.S +++ b/libc/arch-arm/denver/bionic/__strcat_chk.S @@ -40,12 +40,10 @@ ENTRY(__strcat_chk) pld [r0, #0] push {r0, lr} - .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 push {r4, r5} - .save {r4, r5} .cfi_adjust_cfa_offset 8 .cfi_rel_offset r4, 0 .cfi_rel_offset r5, 4 @@ -195,9 +193,6 @@ END(__strcat_chk) #include "memcpy_base.S" ENTRY_PRIVATE(__strcat_chk_failed) - .save {r0, lr} - .save {r4, r5} - .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 diff --git a/libc/arch-arm/denver/bionic/__strcpy_chk.S b/libc/arch-arm/denver/bionic/__strcpy_chk.S index c3e3e14..db76686 100644 --- a/libc/arch-arm/denver/bionic/__strcpy_chk.S +++ b/libc/arch-arm/denver/bionic/__strcpy_chk.S @@ -39,7 +39,6 @@ ENTRY(__strcpy_chk) pld [r0, #0] push {r0, lr} - .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 @@ -161,7 +160,6 @@ END(__strcpy_chk) #include "memcpy_base.S" ENTRY_PRIVATE(__strcpy_chk_failed) - .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 diff --git a/libc/arch-arm/denver/bionic/memcpy.S b/libc/arch-arm/denver/bionic/memcpy.S index da4f3dd..410b663 100644 --- a/libc/arch-arm/denver/bionic/memcpy.S +++ b/libc/arch-arm/denver/bionic/memcpy.S @@ -72,7 +72,6 @@ END(__memcpy_chk) ENTRY(memcpy) pld [r1, #64] push {r0, lr} - .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 @@ -85,7 +84,6 @@ END(memcpy) ENTRY_PRIVATE(__memcpy_chk_fail) // Preserve lr for backtrace. push {lr} - .save {lr} .cfi_def_cfa_offset 4 .cfi_rel_offset lr, 0 diff --git a/libc/arch-arm/denver/denver.mk b/libc/arch-arm/denver/denver.mk index 6989187..0bc52a2 100644 --- a/libc/arch-arm/denver/denver.mk +++ b/libc/arch-arm/denver/denver.mk @@ -7,7 +7,8 @@ libc_bionic_src_files_arm += \ # Use cortex-a15 versions of strcat/strcpy/strlen. libc_bionic_src_files_arm += \ + arch-arm/cortex-a15/bionic/stpcpy.S \ arch-arm/cortex-a15/bionic/strcat.S \ + arch-arm/cortex-a15/bionic/strcmp.S \ arch-arm/cortex-a15/bionic/strcpy.S \ arch-arm/cortex-a15/bionic/strlen.S \ - arch-arm/cortex-a15/bionic/strcmp.S \ diff --git a/libc/arch-arm/generic/bionic/memcpy.S b/libc/arch-arm/generic/bionic/memcpy.S index cd4a13d..b0c79ab 100644 --- a/libc/arch-arm/generic/bionic/memcpy.S +++ b/libc/arch-arm/generic/bionic/memcpy.S @@ -39,7 +39,7 @@ ENTRY(__memcpy_chk) cmp r2, r3 - bgt fortify_check_failed + bhi __memcpy_chk_fail // Fall through to memcpy... END(__memcpy_chk) @@ -49,11 +49,14 @@ ENTRY(memcpy) * ARM ABI. Since we have to save R0, we might as well save R4 * which we can use for better pipelining of the reads below */ - .save {r0, r4, lr} stmfd sp!, {r0, r4, lr} + .cfi_def_cfa_offset 12 + .cfi_rel_offset r0, 0 + .cfi_rel_offset r4, 4 + .cfi_rel_offset lr, 8 /* Making room for r5-r11 which will be spilled later */ - .pad #28 sub sp, sp, #28 + .cfi_adjust_cfa_offset 28 // preload the destination because we'll align it to a cache line // with small writes. Also start the source "pump". @@ -63,14 +66,14 @@ ENTRY(memcpy) /* it simplifies things to take care of len<4 early */ cmp r2, #4 - blo copy_last_3_and_return + blo .Lcopy_last_3_and_return /* compute the offset to align the source * offset = (4-(src&3))&3 = -src & 3 */ rsb r3, r1, #0 ands r3, r3, #3 - beq src_aligned + beq .Lsrc_aligned /* align source to 32 bits. We need to insert 2 instructions between * a ldr[b|h] and str[b|h] because byte and half-word instructions @@ -85,12 +88,12 @@ ENTRY(memcpy) strcsb r4, [r0], #1 strcsb r12,[r0], #1 -src_aligned: +.Lsrc_aligned: /* see if src and dst are aligned together (congruent) */ eor r12, r0, r1 tst r12, #3 - bne non_congruent + bne .Lnon_congruent /* Use post-incriment mode for stm to spill r5-r11 to reserved stack * frame. Don't update sp. @@ -100,7 +103,7 @@ src_aligned: /* align the destination to a cache-line */ rsb r3, r0, #0 ands r3, r3, #0x1C - beq congruent_aligned32 + beq .Lcongruent_aligned32 cmp r3, r2 andhi r3, r2, #0x1C @@ -115,14 +118,14 @@ src_aligned: strne r10,[r0], #4 sub r2, r2, r3 -congruent_aligned32: +.Lcongruent_aligned32: /* * here source is aligned to 32 bytes. */ -cached_aligned32: +.Lcached_aligned32: subs r2, r2, #32 - blo less_than_32_left + blo .Lless_than_32_left /* * We preload a cache-line up to 64 bytes ahead. On the 926, this will @@ -160,10 +163,7 @@ cached_aligned32: add r2, r2, #32 - - - -less_than_32_left: +.Lless_than_32_left: /* * less than 32 bytes left at this point (length in r2) */ @@ -197,7 +197,7 @@ less_than_32_left: /********************************************************************/ -non_congruent: +.Lnon_congruent: /* * here source is aligned to 4 bytes * but destination is not. @@ -207,9 +207,9 @@ non_congruent: * partial words in the shift queue) */ cmp r2, #4 - blo copy_last_3_and_return + blo .Lcopy_last_3_and_return - /* Use post-incriment mode for stm to spill r5-r11 to reserved stack + /* Use post-increment mode for stm to spill r5-r11 to reserved stack * frame. Don't update sp. */ stmea sp, {r5-r11} @@ -236,7 +236,7 @@ non_congruent: movcs r3, r3, lsr #8 cmp r2, #4 - blo partial_word_tail + blo .Lpartial_word_tail /* Align destination to 32 bytes (cache line boundary) */ 1: tst r0, #0x1c @@ -248,11 +248,11 @@ non_congruent: str r4, [r0], #4 cmp r2, #4 bhs 1b - blo partial_word_tail + blo .Lpartial_word_tail /* copy 32 bytes at a time */ 2: subs r2, r2, #32 - blo less_than_thirtytwo + blo .Lless_than_thirtytwo /* Use immediate mode for the shifts, because there is an extra cycle * for register shifts, which could account for up to 50% of @@ -260,11 +260,11 @@ non_congruent: */ cmp r12, #24 - beq loop24 + beq .Lloop24 cmp r12, #8 - beq loop8 + beq .Lloop8 -loop16: +.Lloop16: ldr r12, [r1], #4 1: mov r4, r12 ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} @@ -289,9 +289,9 @@ loop16: stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} mov r3, r11, lsr #16 bhs 1b - b less_than_thirtytwo + b .Lless_than_thirtytwo -loop8: +.Lloop8: ldr r12, [r1], #4 1: mov r4, r12 ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} @@ -316,9 +316,9 @@ loop8: stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} mov r3, r11, lsr #8 bhs 1b - b less_than_thirtytwo + b .Lless_than_thirtytwo -loop24: +.Lloop24: ldr r12, [r1], #4 1: mov r4, r12 ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} @@ -345,12 +345,12 @@ loop24: bhs 1b -less_than_thirtytwo: +.Lless_than_thirtytwo: /* copy the last 0 to 31 bytes of the source */ rsb r12, lr, #32 /* we corrupted r12, recompute it */ add r2, r2, #32 cmp r2, #4 - blo partial_word_tail + blo .Lpartial_word_tail 1: ldr r5, [r1], #4 sub r2, r2, #4 @@ -360,7 +360,7 @@ less_than_thirtytwo: cmp r2, #4 bhs 1b -partial_word_tail: +.Lpartial_word_tail: /* we have a partial word in the input buffer */ movs r5, lr, lsl #(31-3) strmib r3, [r0], #1 @@ -372,7 +372,7 @@ partial_word_tail: /* Refill spilled registers from the stack. Don't update sp. */ ldmfd sp, {r5-r11} -copy_last_3_and_return: +.Lcopy_last_3_and_return: movs r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */ ldrmib r2, [r1], #1 ldrcsb r3, [r1], #1 @@ -385,9 +385,15 @@ copy_last_3_and_return: add sp, sp, #28 ldmfd sp!, {r0, r4, lr} bx lr +END(memcpy) // Only reached when the __memcpy_chk check fails. -fortify_check_failed: +ENTRY_PRIVATE(__memcpy_chk_fail) + // Preserve lr for backtrace. + push {lr} + .cfi_def_cfa_offset 4 + .cfi_rel_offset lr, 0 + ldr r0, error_message ldr r1, error_code 1: @@ -397,7 +403,7 @@ error_code: .word BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW error_message: .word error_string-(1b+8) -END(memcpy) +END(__memcpy_chk_fail) .data error_string: diff --git a/libc/arch-arm/generic/generic.mk b/libc/arch-arm/generic/generic.mk index 2456e6e..95be867 100644 --- a/libc/arch-arm/generic/generic.mk +++ b/libc/arch-arm/generic/generic.mk @@ -7,4 +7,5 @@ libc_bionic_src_files_arm += \ bionic/memmove.c \ bionic/__strcat_chk.cpp \ bionic/__strcpy_chk.cpp \ + upstream-openbsd/lib/libc/string/stpcpy.c \ upstream-openbsd/lib/libc/string/strcat.c \ diff --git a/libc/arch-arm/krait/bionic/__strcat_chk.S b/libc/arch-arm/krait/bionic/__strcat_chk.S index 34becdb..246f159 100644 --- a/libc/arch-arm/krait/bionic/__strcat_chk.S +++ b/libc/arch-arm/krait/bionic/__strcat_chk.S @@ -40,12 +40,10 @@ ENTRY(__strcat_chk) pld [r0, #0] push {r0, lr} - .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 push {r4, r5} - .save {r4, r5} .cfi_adjust_cfa_offset 8 .cfi_rel_offset r4, 0 .cfi_rel_offset r5, 4 @@ -194,8 +192,6 @@ END(__strcat_chk) #include "memcpy_base.S" ENTRY_PRIVATE(__strcat_chk_failed) - .save {r0, lr} - .save {r4, r5} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 diff --git a/libc/arch-arm/krait/bionic/__strcpy_chk.S b/libc/arch-arm/krait/bionic/__strcpy_chk.S index c3e3e14..db76686 100644 --- a/libc/arch-arm/krait/bionic/__strcpy_chk.S +++ b/libc/arch-arm/krait/bionic/__strcpy_chk.S @@ -39,7 +39,6 @@ ENTRY(__strcpy_chk) pld [r0, #0] push {r0, lr} - .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 @@ -161,7 +160,6 @@ END(__strcpy_chk) #include "memcpy_base.S" ENTRY_PRIVATE(__strcpy_chk_failed) - .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 diff --git a/libc/arch-arm/krait/bionic/memcpy.S b/libc/arch-arm/krait/bionic/memcpy.S index 0b7b276..9ff46a8 100644 --- a/libc/arch-arm/krait/bionic/memcpy.S +++ b/libc/arch-arm/krait/bionic/memcpy.S @@ -53,7 +53,6 @@ END(__memcpy_chk) ENTRY(memcpy) pld [r1, #64] stmfd sp!, {r0, lr} - .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 @@ -66,7 +65,6 @@ END(memcpy) ENTRY_PRIVATE(__memcpy_chk_fail) // Preserve lr for backtrace. push {lr} - .save {lr} .cfi_def_cfa_offset 4 .cfi_rel_offset lr, 0 diff --git a/libc/arch-arm/krait/bionic/memcpy_base.S b/libc/arch-arm/krait/bionic/memcpy_base.S index 99fc255..035dcf1 100644 --- a/libc/arch-arm/krait/bionic/memcpy_base.S +++ b/libc/arch-arm/krait/bionic/memcpy_base.S @@ -36,7 +36,6 @@ // Assumes neon instructions and a cache line size of 32 bytes. ENTRY_PRIVATE(MEMCPY_BASE) - .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 diff --git a/libc/arch-arm/krait/bionic/memset.S b/libc/arch-arm/krait/bionic/memset.S index 5d1943b..e9f6431 100644 --- a/libc/arch-arm/krait/bionic/memset.S +++ b/libc/arch-arm/krait/bionic/memset.S @@ -43,7 +43,6 @@ ENTRY(__memset_chk) bls .L_done // Preserve lr for backtrace. - .save {lr} push {lr} .cfi_def_cfa_offset 4 .cfi_rel_offset lr, 0 @@ -69,7 +68,6 @@ END(bzero) /* memset() returns its first argument. */ ENTRY(memset) - .save {r0} stmfd sp!, {r0} .cfi_def_cfa_offset 4 .cfi_rel_offset r0, 0 diff --git a/libc/arch-arm/krait/bionic/strcmp.S b/libc/arch-arm/krait/bionic/strcmp.S index eacb82a..9121c01 100644 --- a/libc/arch-arm/krait/bionic/strcmp.S +++ b/libc/arch-arm/krait/bionic/strcmp.S @@ -168,7 +168,6 @@ ENTRY(strcmp) bne .L_do_align /* Fast path. */ - .save {r4-r7} init .L_doubleword_aligned: diff --git a/libc/arch-arm/krait/krait.mk b/libc/arch-arm/krait/krait.mk index 631ab68..1bb7b0a 100644 --- a/libc/arch-arm/krait/krait.mk +++ b/libc/arch-arm/krait/krait.mk @@ -7,6 +7,7 @@ libc_bionic_src_files_arm += \ # Use cortex-a15 versions of strcat/strcpy/strlen and standard memmove libc_bionic_src_files_arm += \ + arch-arm/cortex-a15/bionic/stpcpy.S \ arch-arm/cortex-a15/bionic/strcat.S \ arch-arm/cortex-a15/bionic/strcpy.S \ arch-arm/cortex-a15/bionic/strlen.S \ |