diff options
-rw-r--r-- | libc/arch-arm/cortex-a15/bionic/__strcat_chk.S | 29 | ||||
-rw-r--r-- | libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S | 21 | ||||
-rw-r--r-- | libc/arch-arm/cortex-a15/bionic/memcpy.S | 15 | ||||
-rw-r--r-- | libc/arch-arm/cortex-a15/bionic/memcpy_base.S | 40 | ||||
-rw-r--r-- | libc/arch-arm/cortex-a15/bionic/strcmp.S | 95 | ||||
-rw-r--r-- | libc/arch-arm/cortex-a9/bionic/__strcat_chk.S | 29 | ||||
-rw-r--r-- | libc/arch-arm/cortex-a9/bionic/__strcpy_chk.S | 25 | ||||
-rw-r--r-- | libc/arch-arm/cortex-a9/bionic/memcpy.S | 12 | ||||
-rw-r--r-- | libc/arch-arm/cortex-a9/bionic/memcpy_base.S | 32 | ||||
-rw-r--r-- | libc/arch-arm/cortex-a9/bionic/memset.S | 20 | ||||
-rw-r--r-- | libc/arch-arm/cortex-a9/bionic/strcmp.S | 94 | ||||
-rw-r--r-- | libc/arch-arm/krait/bionic/__strcat_chk.S | 27 | ||||
-rw-r--r-- | libc/arch-arm/krait/bionic/__strcpy_chk.S | 21 | ||||
-rw-r--r-- | libc/arch-arm/krait/bionic/memcpy.S | 15 | ||||
-rw-r--r-- | libc/arch-arm/krait/bionic/memcpy_base.S | 10 | ||||
-rw-r--r-- | libc/arch-arm/krait/bionic/strcmp.S | 97 |
16 files changed, 350 insertions, 232 deletions
diff --git a/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S b/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S index 4b125c8..4693600 100644 --- a/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S +++ b/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S @@ -40,13 +40,13 @@ ENTRY(__strcat_chk) .cfi_startproc pld [r0, #0] - .save {r0, lr} push {r0, lr} + .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 - .save {r4, r5} push {r4, r5} + .save {r4, r5} .cfi_adjust_cfa_offset 8 .cfi_rel_offset r4, 0 .cfi_rel_offset r5, 4 @@ -180,22 +180,31 @@ ENTRY(__strcat_chk) .L_strlen_done: add r2, r3, r4 cmp r2, lr - bgt .L_fortify_check_failed + bgt __strcat_chk_failed // Set up the registers for the memcpy code. mov r1, r5 pld [r1, #64] mov r2, r4 add r0, r0, r3 - .pad #-8 pop {r4, r5} - .cfi_adjust_cfa_offset -8 - .cfi_restore r4 - .cfi_restore r5 - #include "memcpy_base.S" + .cfi_endproc +END(__strcat_chk) + +#define MEMCPY_BASE __strcat_chk_memcpy_base +#define MEMCPY_BASE_ALIGNED __strcat_chk_memcpy_base_aligned + +#include "memcpy_base.S" -.L_fortify_check_failed: +ENTRY(__strcat_chk_failed) + .cfi_startproc + .save {r0, lr} + .save {r4, r5} + + .cfi_def_cfa_offset 8 + .cfi_rel_offset r0, 0 + .cfi_rel_offset lr, 4 .cfi_adjust_cfa_offset 8 .cfi_rel_offset r4, 0 .cfi_rel_offset r5, 4 @@ -211,7 +220,7 @@ error_message: .word error_string-(1b+4) .cfi_endproc -END(__strcat_chk) +END(__strcat_chk_failed) .data error_string: diff --git a/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S b/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S index a045816..1224b49 100644 --- a/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S +++ b/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S @@ -39,8 +39,8 @@ ENTRY(__strcpy_chk) .cfi_startproc pld [r0, #0] - .save {r0, lr} push {r0, lr} + .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 @@ -151,14 +151,25 @@ ENTRY(__strcpy_chk) pld [r1, #64] ldr r0, [sp] cmp r3, lr - bge .L_fortify_check_failed + bge __strcpy_chk_failed // Add 1 for copy length to get the string terminator. add r2, r3, #1 - #include "memcpy_base.S" + .cfi_endproc +END(__strcpy_chk) + +#define MEMCPY_BASE __strcpy_chk_memcpy_base +#define MEMCPY_BASE_ALIGNED __strcpy_chk_memcpy_base_aligned +#include "memcpy_base.S" + +ENTRY(__strcpy_chk_failed) + .cfi_startproc + .save {r0, lr} + .cfi_def_cfa_offset 8 + .cfi_rel_offset r0, 0 + .cfi_rel_offset lr, 4 -.L_fortify_check_failed: ldr r0, error_message ldr r1, error_code 1: @@ -170,7 +181,7 @@ error_message: .word error_string-(1b+4) .cfi_endproc -END(__strcpy_chk) +END(__strcpy_chk_failed) .data error_string: diff --git a/libc/arch-arm/cortex-a15/bionic/memcpy.S b/libc/arch-arm/cortex-a15/bionic/memcpy.S index 16881d4..a300e43 100644 --- a/libc/arch-arm/cortex-a15/bionic/memcpy.S +++ b/libc/arch-arm/cortex-a15/bionic/memcpy.S @@ -74,23 +74,24 @@ END(__memcpy_chk) ENTRY(memcpy) .cfi_startproc pld [r1, #64] - .save {r0, lr} push {r0, lr} + .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 - #include "memcpy_base.S" - .cfi_endproc END(memcpy) - .fnstart +#define MEMCPY_BASE __memcpy_base +#define MEMCPY_BASE_ALIGNED __memcpy_base_aligned +#include "memcpy_base.S" + +ENTRY(__memcpy_chk_fail) .cfi_startproc -__memcpy_chk_fail: // Preserve lr for backtrace. - .save {lr} push {lr} + .save {lr} .cfi_def_cfa_offset 4 .cfi_rel_offset lr, 0 @@ -104,7 +105,7 @@ error_code: error_message: .word error_string-(1b+8) .cfi_endproc - .fnend +END(__memcpy_chk_fail) .data error_string: diff --git a/libc/arch-arm/cortex-a15/bionic/memcpy_base.S b/libc/arch-arm/cortex-a15/bionic/memcpy_base.S index 647e065..0154676 100644 --- a/libc/arch-arm/cortex-a15/bionic/memcpy_base.S +++ b/libc/arch-arm/cortex-a15/bionic/memcpy_base.S @@ -53,6 +53,13 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +ENTRY(MEMCPY_BASE) + .cfi_startproc + .save {r0, lr} + .cfi_def_cfa_offset 8 + .cfi_rel_offset r0, 0 + .cfi_rel_offset lr, 4 + // Assumes that n >= 0, and dst, src are valid pointers. // For any sizes less than 832 use the neon code that doesn't // care about the src alignment. This avoids any checks @@ -162,20 +169,34 @@ ands r3, r3, #0x3 bne .L_copy_unknown_alignment + .cfi_endproc +END(MEMCPY_BASE) + +ENTRY(MEMCPY_BASE_ALIGNED) + .cfi_startproc + .save {r0, lr} + .cfi_def_cfa_offset 8 + .cfi_rel_offset r0, 0 + .cfi_rel_offset lr, 4 + // To try and improve performance, stack layout changed, // i.e., not keeping the stack looking like users expect // (highest numbered register at highest address). - // TODO: Add debug frame directives. - // We don't need exception unwind directives, because the code below - // does not throw any exceptions and does not call any other functions. - // Generally, newlib functions like this lack debug information for - // assembler source. - .save {r4, r5} strd r4, r5, [sp, #-8]! - .save {r6, r7} + .save {r4, r5} + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset r4, 0 + .cfi_rel_offset r5, 4 strd r6, r7, [sp, #-8]! - .save {r8, r9} + .save {r6, r7} + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset r6, 0 + .cfi_rel_offset r7, 0 strd r8, r9, [sp, #-8]! + .save {r8, r9} + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset r8, 0 + .cfi_rel_offset r9, 4 // Optimized for already aligned dst code. ands ip, r0, #3 @@ -301,3 +322,6 @@ // Src is guaranteed to be at least word aligned by this point. b .L_word_aligned + + .cfi_endproc +END(MEMCPY_BASE_ALIGNED) diff --git a/libc/arch-arm/cortex-a15/bionic/strcmp.S b/libc/arch-arm/cortex-a15/bionic/strcmp.S index 2719bf7..13b329f 100644 --- a/libc/arch-arm/cortex-a15/bionic/strcmp.S +++ b/libc/arch-arm/cortex-a15/bionic/strcmp.S @@ -122,7 +122,6 @@ ENTRY(strcmp) .macro init /* Macro to save temporary registers and prepare magic values. */ - .save {r4-r7} subs sp, sp, #16 .cfi_def_cfa_offset 16 strd r4, r5, [sp, #8] @@ -178,12 +177,13 @@ ENTRY(strcmp) /* Are both strings double-word aligned? */ orr ip, r0, r1 tst ip, #7 - bne do_align + bne .L_do_align /* Fast path. */ + .save {r4-r7} init -doubleword_aligned: +.L_doubleword_aligned: /* Get here when the strings to compare are double-word aligned. */ /* Compare two words in every iteration. */ @@ -196,14 +196,14 @@ doubleword_aligned: ldrd r2, r3, [r0], #8 ldrd r4, r5, [r1], #8 - magic_compare_and_branch w1=r2, w2=r4, label=return_24 - magic_compare_and_branch w1=r3, w2=r5, label=return_35 + magic_compare_and_branch w1=r2, w2=r4, label=.L_return_24 + magic_compare_and_branch w1=r3, w2=r5, label=.L_return_35 b 2b -do_align: +.L_do_align: /* Is the first string word-aligned? */ ands ip, r0, #3 - beq word_aligned_r0 + beq .L_word_aligned_r0 /* Fast compare byte by byte until the first string is word-aligned. */ /* The offset of r0 from a word boundary is in ip. Thus, the number of bytes @@ -211,58 +211,58 @@ do_align: bic r0, r0, #3 ldr r2, [r0], #4 lsls ip, ip, #31 - beq byte2 - bcs byte3 + beq .L_byte2 + bcs .L_byte3 -byte1: +.L_byte1: ldrb ip, [r1], #1 uxtb r3, r2, ror #BYTE1_OFFSET subs ip, r3, ip - bne fast_return - m_cbz reg=r3, label=fast_return + bne .L_fast_return + m_cbz reg=r3, label=.L_fast_return -byte2: +.L_byte2: ldrb ip, [r1], #1 uxtb r3, r2, ror #BYTE2_OFFSET subs ip, r3, ip - bne fast_return - m_cbz reg=r3, label=fast_return + bne .L_fast_return + m_cbz reg=r3, label=.L_fast_return -byte3: +.L_byte3: ldrb ip, [r1], #1 uxtb r3, r2, ror #BYTE3_OFFSET subs ip, r3, ip - bne fast_return - m_cbnz reg=r3, label=word_aligned_r0 + bne .L_fast_return + m_cbnz reg=r3, label=.L_word_aligned_r0 -fast_return: +.L_fast_return: mov r0, ip bx lr -word_aligned_r0: +.L_word_aligned_r0: init /* The first string is word-aligned. */ /* Is the second string word-aligned? */ ands ip, r1, #3 - bne strcmp_unaligned + bne .L_strcmp_unaligned -word_aligned: +.L_word_aligned: /* The strings are word-aligned. */ /* Is the first string double-word aligned? */ tst r0, #4 - beq doubleword_aligned_r0 + beq .L_doubleword_aligned_r0 /* If r0 is not double-word aligned yet, align it by loading and comparing the next word from each string. */ ldr r2, [r0], #4 ldr r4, [r1], #4 - magic_compare_and_branch w1=r2 w2=r4 label=return_24 + magic_compare_and_branch w1=r2 w2=r4 label=.L_return_24 -doubleword_aligned_r0: +.L_doubleword_aligned_r0: /* Get here when r0 is double-word aligned. */ /* Is r1 doubleword_aligned? */ tst r1, #4 - beq doubleword_aligned + beq .L_doubleword_aligned /* Get here when the strings to compare are word-aligned, r0 is double-word aligned, but r1 is not double-word aligned. */ @@ -278,9 +278,9 @@ doubleword_aligned_r0: /* Load the next double-word from each string and compare. */ ldrd r2, r3, [r0], #8 - magic_compare_and_branch w1=r2 w2=r5 label=return_25 + magic_compare_and_branch w1=r2 w2=r5 label=.L_return_25 ldrd r4, r5, [r1], #8 - magic_compare_and_branch w1=r3 w2=r4 label=return_34 + magic_compare_and_branch w1=r3 w2=r4 label=.L_return_34 b 3b .macro miscmp_word offsetlo offsethi @@ -304,47 +304,47 @@ doubleword_aligned_r0: and r2, r3, r6, S2LOMEM #\offsetlo it eq cmpeq r2, r5 - bne return_25 + bne .L_return_25 ldr r5, [r1], #4 cmp ip, #0 eor r3, r2, r3 S2HIMEM r2, r5, #\offsethi it eq cmpeq r3, r2 - bne return_32 + bne .L_return_32 b 7b .endm /* miscmp_word */ -strcmp_unaligned: +.L_strcmp_unaligned: /* r0 is word-aligned, r1 is at offset ip from a word. */ /* Align r1 to the (previous) word-boundary. */ bic r1, r1, #3 /* Unaligned comparison word by word using LDRs. */ cmp ip, #2 - beq miscmp_word_16 /* If ip == 2. */ - bge miscmp_word_24 /* If ip == 3. */ + beq .L_miscmp_word_16 /* If ip == 2. */ + bge .L_miscmp_word_24 /* If ip == 3. */ miscmp_word offsetlo=8 offsethi=24 /* If ip == 1. */ -miscmp_word_16: miscmp_word offsetlo=16 offsethi=16 -miscmp_word_24: miscmp_word offsetlo=24 offsethi=8 +.L_miscmp_word_16: miscmp_word offsetlo=16 offsethi=16 +.L_miscmp_word_24: miscmp_word offsetlo=24 offsethi=8 -return_32: +.L_return_32: setup_return w1=r3, w2=r2 - b do_return -return_34: + b .L_do_return +.L_return_34: setup_return w1=r3, w2=r4 - b do_return -return_25: + b .L_do_return +.L_return_25: setup_return w1=r2, w2=r5 - b do_return -return_35: + b .L_do_return +.L_return_35: setup_return w1=r3, w2=r5 - b do_return -return_24: + b .L_do_return +.L_return_24: setup_return w1=r2, w2=r4 -do_return: +.L_do_return: #ifdef __ARMEB__ mov r0, ip @@ -355,7 +355,6 @@ do_return: /* Restore temporaries early, before computing the return value. */ ldrd r6, r7, [sp] ldrd r4, r5, [sp, #8] - .pad #-16 adds sp, sp, #16 .cfi_def_cfa_offset 0 .cfi_restore r4 @@ -366,7 +365,7 @@ do_return: /* There is a zero or a different byte between r1 and r2. */ /* r0 contains a mask of all-zero bytes in r1. */ /* Using r0 and not ip here because cbz requires low register. */ - m_cbz reg=r0, label=compute_return_value + m_cbz reg=r0, label=.L_compute_return_value clz r0, r0 /* r0 contains the number of bits on the left of the first all-zero byte in r1. */ rsb r0, r0, #24 @@ -374,7 +373,7 @@ do_return: lsr r1, r1, r0 lsr r2, r2, r0 -compute_return_value: +.L_compute_return_value: movs r0, #1 cmp r1, r2 /* The return value is computed as follows. diff --git a/libc/arch-arm/cortex-a9/bionic/__strcat_chk.S b/libc/arch-arm/cortex-a9/bionic/__strcat_chk.S index 1329c16..cc43456 100644 --- a/libc/arch-arm/cortex-a9/bionic/__strcat_chk.S +++ b/libc/arch-arm/cortex-a9/bionic/__strcat_chk.S @@ -40,13 +40,13 @@ ENTRY(__strcat_chk) .cfi_startproc pld [r0, #0] - .save {r0, lr} push {r0, lr} + .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 - .save {r4, r5} push {r4, r5} + .save {r4, r5} .cfi_adjust_cfa_offset 8 .cfi_rel_offset r4, 0 .cfi_rel_offset r5, 4 @@ -183,22 +183,31 @@ ENTRY(__strcat_chk) .L_strlen_done: add r2, r3, r4 cmp r2, lr - bgt .L_fortify_check_failed + bgt __strcat_chk_fail // Set up the registers for the memcpy code. mov r1, r5 pld [r1, #64] mov r2, r4 add r0, r0, r3 - .pad #-8 pop {r4, r5} - .cfi_adjust_cfa_offset -8 - .cfi_restore r4 - .cfi_restore r5 - #include "memcpy_base.S" + // Fall through into the memcpy_base function. + .cfi_endproc +END(__strcat_chk) -.L_fortify_check_failed: +#define MEMCPY_BASE __strcat_chk_memcpy_base +#define MEMCPY_BASE_ALIGNED __strcat_chk_memcpy_base_aligned +#include "memcpy_base.S" + +ENTRY(__strcat_chk_fail) + .cfi_startproc + + .save {r0, lr} + .save {r4, r5} + .cfi_def_cfa_offset 8 + .cfi_rel_offset r0, 0 + .cfi_rel_offset lr, 4 .cfi_adjust_cfa_offset 8 .cfi_rel_offset r4, 0 .cfi_rel_offset r5, 4 @@ -214,7 +223,7 @@ error_message: .word error_string-(1b+4) .cfi_endproc -END(__strcat_chk) +END(__strcat_chk_fail) .data error_string: diff --git a/libc/arch-arm/cortex-a9/bionic/__strcpy_chk.S b/libc/arch-arm/cortex-a9/bionic/__strcpy_chk.S index b697c1f..dd3370b 100644 --- a/libc/arch-arm/cortex-a9/bionic/__strcpy_chk.S +++ b/libc/arch-arm/cortex-a9/bionic/__strcpy_chk.S @@ -39,8 +39,8 @@ ENTRY(__strcpy_chk) .cfi_startproc pld [r0, #0] - .save {r0, lr} push {r0, lr} + .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 @@ -153,26 +153,41 @@ ENTRY(__strcpy_chk) pld [r1, #64] ldr r0, [sp] cmp r3, lr - bge .L_fortify_check_failed + bge __strcpy_chk_fail // Add 1 for copy length to get the string terminator. add r2, r3, #1 - #include "memcpy_base.S" + .cfi_endproc + + // Fall through into the memcpy_base function. +END(__strcpy_chk) + +#define MEMCPY_BASE __strcpy_chk_memcpy_base +#define MEMCPY_BASE_ALIGNED __strcpy_chk_memcpy_base_aligned +#include "memcpy_base.S" + +ENTRY(__strcpy_chk_fail) + .cfi_startproc + + .save {r0, lr} + .cfi_def_cfa_offset 8 + .cfi_rel_offset r0, 0 + .cfi_rel_offset lr, 4 -.L_fortify_check_failed: ldr r0, error_message ldr r1, error_code 1: add r0, pc bl __fortify_chk_fail + error_code: .word BIONIC_EVENT_STRCPY_BUFFER_OVERFLOW error_message: .word error_string-(1b+4) .cfi_endproc -END(__strcpy_chk) +END(__strcpy_chk_fail) .data error_string: diff --git a/libc/arch-arm/cortex-a9/bionic/memcpy.S b/libc/arch-arm/cortex-a9/bionic/memcpy.S index ab3b0a0..21e0ebe 100644 --- a/libc/arch-arm/cortex-a9/bionic/memcpy.S +++ b/libc/arch-arm/cortex-a9/bionic/memcpy.S @@ -51,22 +51,27 @@ END(__memcpy_chk) ENTRY(memcpy) .cfi_startproc + pld [r1, #0] - .save {r0, lr} stmfd sp!, {r0, lr} + .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 pld [r1, #64] - #include "memcpy_base.S" .cfi_endproc END(memcpy) +#define MEMCPY_BASE __memcpy_base +#define MEMCPY_BASE_ALIGNED __memcpy_base_aligned +#include "memcpy_base.S" + +ENTRY(__memcpy_chk_fail) .cfi_startproc -__memcpy_chk_fail: // Preserve lr for backtrace. push {lr} + .save {lr} .cfi_def_cfa_offset 4 .cfi_rel_offset lr, 0 @@ -80,6 +85,7 @@ error_code: error_message: .word error_string-(1b+4) .cfi_endproc +END(__memcpy_chk_fail) .data error_string: diff --git a/libc/arch-arm/cortex-a9/bionic/memcpy_base.S b/libc/arch-arm/cortex-a9/bionic/memcpy_base.S index 088d29e..e8ff4f5 100644 --- a/libc/arch-arm/cortex-a9/bionic/memcpy_base.S +++ b/libc/arch-arm/cortex-a9/bionic/memcpy_base.S @@ -32,15 +32,21 @@ * cache line. */ +ENTRY(MEMCPY_BASE) + .cfi_startproc + .save {r0, lr} + .cfi_def_cfa_offset 8 + .cfi_rel_offset r0, 0 + .cfi_rel_offset lr, 4 + // Check so divider is at least 16 bytes, needed for alignment code. cmp r2, #16 blo 5f - /* check if buffers are aligned. If so, run arm-only version */ eor r3, r0, r1 ands r3, r3, #0x3 - beq 11f + beq __memcpy_base_aligned /* Check the upper size limit for Neon unaligned memory access in memcpy */ cmp r2, #224 @@ -131,11 +137,22 @@ ldmfd sp!, {r0, lr} bx lr -11: + + .cfi_endproc +END(MEMCPY_BASE) + +ENTRY(MEMCPY_BASE_ALIGNED) + .cfi_startproc + + .save {r0, lr} + .cfi_def_cfa_offset 8 + .cfi_rel_offset r0, 0 + .cfi_rel_offset lr, 4 + /* Simple arm-only copy loop to handle aligned copy operations */ - .save {r4-r8} stmfd sp!, {r4-r8} - .cfi_def_cfa_offset 20 + .save {r4-r8} + .cfi_adjust_cfa_offset 20 .cfi_rel_offset r4, 0 .cfi_rel_offset r5, 4 .cfi_rel_offset r6, 8 @@ -209,5 +226,8 @@ ldrbne r3, [r1] /* last byte */ strbne r3, [r0] 6: - ldmfd sp!, {r4, r5, r6, r7, r8} + ldmfd sp!, {r4-r8} ldmfd sp!, {r0, pc} + + .cfi_endproc +END(MEMCPY_BASE_ALIGNED) diff --git a/libc/arch-arm/cortex-a9/bionic/memset.S b/libc/arch-arm/cortex-a9/bionic/memset.S index a7876fb..87d2c08 100644 --- a/libc/arch-arm/cortex-a9/bionic/memset.S +++ b/libc/arch-arm/cortex-a9/bionic/memset.S @@ -43,8 +43,8 @@ ENTRY(__memset_chk) bls .L_done // Preserve lr for backtrace. - .save {lr} push {lr} + .save {lr} .cfi_def_cfa_offset 4 .cfi_rel_offset lr, 0 @@ -74,12 +74,13 @@ END(bzero) /* memset() returns its first argument. */ ENTRY(memset) .cfi_startproc + # The neon memset only wins for less than 132. cmp r2, #132 - bhi 11f + bhi __memset_large_copy - .save {r0} stmfd sp!, {r0} + .save {r0} .cfi_def_cfa_offset 4 .cfi_rel_offset r0, 0 @@ -114,13 +115,18 @@ ENTRY(memset) strcsb r1, [r0], #1 ldmfd sp!, {r0} bx lr -11: + + .cfi_endproc +END(memset) + +ENTRY(__memset_large_copy) + .cfi_startproc + /* compute the offset to align the destination * offset = (4-(src&3))&3 = -src & 3 */ - - .save {r0, r4-r7, lr} stmfd sp!, {r0, r4-r7, lr} + .save {r0, r4-r7, lr} .cfi_def_cfa_offset 24 .cfi_rel_offset r0, 0 .cfi_rel_offset r4, 4 @@ -191,7 +197,7 @@ ENTRY(memset) ldmfd sp!, {r0, r4-r7, lr} bx lr .cfi_endproc -END(memset) +END(__memset_large_copy) .data error_string: diff --git a/libc/arch-arm/cortex-a9/bionic/strcmp.S b/libc/arch-arm/cortex-a9/bionic/strcmp.S index a84c047..232df75 100644 --- a/libc/arch-arm/cortex-a9/bionic/strcmp.S +++ b/libc/arch-arm/cortex-a9/bionic/strcmp.S @@ -122,7 +122,6 @@ ENTRY(strcmp) .macro init /* Macro to save temporary registers and prepare magic values. */ - .save {r4-r7} subs sp, sp, #16 .cfi_def_cfa_offset 16 strd r4, r5, [sp, #8] @@ -178,12 +177,13 @@ ENTRY(strcmp) /* Are both strings double-word aligned? */ orr ip, r0, r1 tst ip, #7 - bne do_align + bne .L_do_align /* Fast path. */ + .save {r4-r7} init -doubleword_aligned: +.L_doubleword_aligned: /* Get here when the strings to compare are double-word aligned. */ /* Compare two words in every iteration. */ @@ -196,14 +196,14 @@ doubleword_aligned: ldrd r2, r3, [r0], #8 ldrd r4, r5, [r1], #8 - magic_compare_and_branch w1=r2, w2=r4, label=return_24 - magic_compare_and_branch w1=r3, w2=r5, label=return_35 + magic_compare_and_branch w1=r2, w2=r4, label=.L_return_24 + magic_compare_and_branch w1=r3, w2=r5, label=.L_return_35 b 2b -do_align: +.L_do_align: /* Is the first string word-aligned? */ ands ip, r0, #3 - beq word_aligned_r0 + beq .L_word_aligned_r0 /* Fast compare byte by byte until the first string is word-aligned. */ /* The offset of r0 from a word boundary is in ip. Thus, the number of bytes @@ -211,58 +211,58 @@ do_align: bic r0, r0, #3 ldr r2, [r0], #4 lsls ip, ip, #31 - beq byte2 - bcs byte3 + beq .L_byte2 + bcs .L_byte3 -byte1: +.L_byte1: ldrb ip, [r1], #1 uxtb r3, r2, ror #BYTE1_OFFSET subs ip, r3, ip - bne fast_return - m_cbz reg=r3, label=fast_return + bne .L_fast_return + m_cbz reg=r3, label=.L_fast_return -byte2: +.L_byte2: ldrb ip, [r1], #1 uxtb r3, r2, ror #BYTE2_OFFSET subs ip, r3, ip - bne fast_return - m_cbz reg=r3, label=fast_return + bne .L_fast_return + m_cbz reg=r3, label=.L_fast_return -byte3: +.L_byte3: ldrb ip, [r1], #1 uxtb r3, r2, ror #BYTE3_OFFSET subs ip, r3, ip - bne fast_return - m_cbnz reg=r3, label=word_aligned_r0 + bne .L_fast_return + m_cbnz reg=r3, label=.L_word_aligned_r0 -fast_return: +.L_fast_return: mov r0, ip bx lr -word_aligned_r0: +.L_word_aligned_r0: init /* The first string is word-aligned. */ /* Is the second string word-aligned? */ ands ip, r1, #3 - bne strcmp_unaligned + bne .L_strcmp_unaligned -word_aligned: +.L_word_aligned: /* The strings are word-aligned. */ /* Is the first string double-word aligned? */ tst r0, #4 - beq doubleword_aligned_r0 + beq .L_doubleword_aligned_r0 /* If r0 is not double-word aligned yet, align it by loading and comparing the next word from each string. */ ldr r2, [r0], #4 ldr r4, [r1], #4 - magic_compare_and_branch w1=r2 w2=r4 label=return_24 + magic_compare_and_branch w1=r2 w2=r4 label=.L_return_24 -doubleword_aligned_r0: +.L_doubleword_aligned_r0: /* Get here when r0 is double-word aligned. */ /* Is r1 doubleword_aligned? */ tst r1, #4 - beq doubleword_aligned + beq .L_doubleword_aligned /* Get here when the strings to compare are word-aligned, r0 is double-word aligned, but r1 is not double-word aligned. */ @@ -278,9 +278,9 @@ doubleword_aligned_r0: /* Load the next double-word from each string and compare. */ ldrd r2, r3, [r0], #8 - magic_compare_and_branch w1=r2 w2=r5 label=return_25 + magic_compare_and_branch w1=r2 w2=r5 label=.L_return_25 ldrd r4, r5, [r1], #8 - magic_compare_and_branch w1=r3 w2=r4 label=return_34 + magic_compare_and_branch w1=r3 w2=r4 label=.L_return_34 b 3b .macro miscmp_word offsetlo offsethi @@ -304,33 +304,33 @@ doubleword_aligned_r0: and r2, r3, r6, S2LOMEM #\offsetlo it eq cmpeq r2, r5 - bne return_25 + bne .L_return_25 ldr r5, [r1], #4 cmp ip, #0 eor r3, r2, r3 S2HIMEM r2, r5, #\offsethi it eq cmpeq r3, r2 - bne return_32 + bne .L_return_32 b 7b .endm /* miscmp_word */ -return_32: +.L_return_32: setup_return w1=r3, w2=r2 - b do_return -return_34: + b .L_do_return +.L_return_34: setup_return w1=r3, w2=r4 - b do_return -return_25: + b .L_do_return +.L_return_25: setup_return w1=r2, w2=r5 - b do_return -return_35: + b .L_do_return +.L_return_35: setup_return w1=r3, w2=r5 - b do_return -return_24: + b .L_do_return +.L_return_24: setup_return w1=r2, w2=r4 -do_return: +.L_do_return: #ifdef __ARMEB__ mov r0, ip @@ -341,7 +341,6 @@ do_return: /* Restore temporaries early, before computing the return value. */ ldrd r6, r7, [sp] ldrd r4, r5, [sp, #8] - .pad #-16 adds sp, sp, #16 .cfi_def_cfa_offset 0 .cfi_restore r4 @@ -352,7 +351,7 @@ do_return: /* There is a zero or a different byte between r1 and r2. */ /* r0 contains a mask of all-zero bytes in r1. */ /* Using r0 and not ip here because cbz requires low register. */ - m_cbz reg=r0, label=compute_return_value + m_cbz reg=r0, label=.L_compute_return_value clz r0, r0 /* r0 contains the number of bits on the left of the first all-zero byte in r1. */ rsb r0, r0, #24 @@ -360,7 +359,7 @@ do_return: lsr r1, r1, r0 lsr r2, r2, r0 -compute_return_value: +.L_compute_return_value: movs r0, #1 cmp r1, r2 /* The return value is computed as follows. @@ -380,7 +379,7 @@ compute_return_value: * bionic/libc/arch-arm/cortex-a15/bionic/strcmp.S for the unedited * version of the code. */ -strcmp_unaligned: +.L_strcmp_unaligned: wp1 .req r0 wp2 .req r1 b1 .req r2 @@ -532,7 +531,6 @@ strcmp_unaligned: /* Restore registers and stack. */ ldrd r6, r7, [sp] ldrd r4, r5, [sp, #8] - .pad #-16 adds sp, sp, #16 .cfi_def_cfa_offset 0 .cfi_restore r4 @@ -557,13 +555,7 @@ strcmp_unaligned: /* Restore registers and stack. */ ldrd r6, r7, [sp] ldrd r4, r5, [sp, #8] - .pad #-16 adds sp, sp, #16 - .cfi_def_cfa_offset 0 - .cfi_restore r4 - .cfi_restore r5 - .cfi_restore r6 - .cfi_restore r7 bx lr .cfi_endproc diff --git a/libc/arch-arm/krait/bionic/__strcat_chk.S b/libc/arch-arm/krait/bionic/__strcat_chk.S index 4b125c8..ec99077 100644 --- a/libc/arch-arm/krait/bionic/__strcat_chk.S +++ b/libc/arch-arm/krait/bionic/__strcat_chk.S @@ -40,13 +40,13 @@ ENTRY(__strcat_chk) .cfi_startproc pld [r0, #0] - .save {r0, lr} push {r0, lr} + .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 - .save {r4, r5} push {r4, r5} + .save {r4, r5} .cfi_adjust_cfa_offset 8 .cfi_rel_offset r4, 0 .cfi_rel_offset r5, 4 @@ -180,22 +180,29 @@ ENTRY(__strcat_chk) .L_strlen_done: add r2, r3, r4 cmp r2, lr - bgt .L_fortify_check_failed + bgt __strcat_chk_failed // Set up the registers for the memcpy code. mov r1, r5 pld [r1, #64] mov r2, r4 add r0, r0, r3 - .pad #-8 pop {r4, r5} - .cfi_adjust_cfa_offset -8 - .cfi_restore r4 - .cfi_restore r5 - #include "memcpy_base.S" + .cfi_endproc +END(__strcat_chk) + +#define MEMCPY_BASE __strcat_chk_memcpy_base +#define MEMCPY_BASE_ALIGNED __strcat_chk_memcpy_base_aligned +#include "memcpy_base.S" -.L_fortify_check_failed: +ENTRY(__strcat_chk_failed) + .cfi_startproc + .save {r0, lr} + .save {r4, r5} + .cfi_def_cfa_offset 8 + .cfi_rel_offset r0, 0 + .cfi_rel_offset lr, 4 .cfi_adjust_cfa_offset 8 .cfi_rel_offset r4, 0 .cfi_rel_offset r5, 4 @@ -211,7 +218,7 @@ error_message: .word error_string-(1b+4) .cfi_endproc -END(__strcat_chk) +END(__strcat_chk_failed) .data error_string: diff --git a/libc/arch-arm/krait/bionic/__strcpy_chk.S b/libc/arch-arm/krait/bionic/__strcpy_chk.S index d5e1db6..7da4d15 100644 --- a/libc/arch-arm/krait/bionic/__strcpy_chk.S +++ b/libc/arch-arm/krait/bionic/__strcpy_chk.S @@ -39,8 +39,8 @@ ENTRY(__strcpy_chk) .cfi_startproc pld [r0, #0] - .save {r0, lr} push {r0, lr} + .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 @@ -151,14 +151,25 @@ ENTRY(__strcpy_chk) pld [r1, #64] ldr r0, [sp] cmp r3, lr - bge .L_fortify_check_failed + bge __strcpy_chk_failed // Add 1 for copy length to get the string terminator. add r2, r3, #1 - #include "memcpy_base.S" + .cfi_endproc +END(__strcpy_chk) + +#define MEMCPY_BASE __strcpy_chk_memcpy_base +#define MEMCPY_BASE_ALIGNED __strcpy_chk_memcpy_base_aligned +#include "memcpy_base.S" + +ENTRY(__strcpy_chk_failed) + .cfi_startproc + .save {r0, lr} + .cfi_def_cfa_offset 8 + .cfi_rel_offset r0, 0 + .cfi_rel_offset lr, 4 -.L_fortify_check_failed: ldr r0, error_message ldr r1, error_code 1: @@ -169,7 +180,7 @@ error_code: error_message: .word error_string-(1b+4) .cfi_endproc -END(__strcpy_chk) +END(__strcpy_chk_failed) .data error_string: diff --git a/libc/arch-arm/krait/bionic/memcpy.S b/libc/arch-arm/krait/bionic/memcpy.S index 093a244..9072408 100644 --- a/libc/arch-arm/krait/bionic/memcpy.S +++ b/libc/arch-arm/krait/bionic/memcpy.S @@ -55,22 +55,23 @@ END(__memcpy_chk) ENTRY(memcpy) .cfi_startproc pld [r1, #64] - .save {r0, lr} stmfd sp!, {r0, lr} + .save {r0, lr} .cfi_def_cfa_offset 8 .cfi_rel_offset r0, 0 .cfi_rel_offset lr, 4 - - #include "memcpy_base.S" .cfi_endproc END(memcpy) - .fnstart +#define MEMCPY_BASE __memcpy_base +#define MEMCPY_BASE_ALIGNED __memcpy_base_aligned +#include "memcpy_base.S" + +ENTRY(__memcpy_chk_fail) .cfi_startproc -__memcpy_chk_fail: // Preserve lr for backtrace. - .save {lr} push {lr} + .save {lr} .cfi_def_cfa_offset 4 .cfi_rel_offset lr, 0 @@ -84,7 +85,7 @@ error_code: error_message: .word error_string-(1b+4) .cfi_endproc - .fnend +END(__memcpy_chk_fail) .data error_string: diff --git a/libc/arch-arm/krait/bionic/memcpy_base.S b/libc/arch-arm/krait/bionic/memcpy_base.S index 48ce477..d87a542 100644 --- a/libc/arch-arm/krait/bionic/memcpy_base.S +++ b/libc/arch-arm/krait/bionic/memcpy_base.S @@ -35,6 +35,13 @@ // Assumes neon instructions and a cache line size of 32 bytes. +ENTRY(MEMCPY_BASE) + .cfi_startproc + .save {r0, lr} + .cfi_def_cfa_offset 8 + .cfi_rel_offset r0, 0 + .cfi_rel_offset lr, 4 + /* do we have at least 16-bytes to copy (needed for alignment below) */ cmp r2, #16 blo 5f @@ -115,3 +122,6 @@ ldmfd sp!, {r0, lr} bx lr + + .cfi_endproc +END(MEMCPY_BASE) diff --git a/libc/arch-arm/krait/bionic/strcmp.S b/libc/arch-arm/krait/bionic/strcmp.S index f26aaf1..d4cf3f4 100644 --- a/libc/arch-arm/krait/bionic/strcmp.S +++ b/libc/arch-arm/krait/bionic/strcmp.S @@ -122,7 +122,6 @@ ENTRY(strcmp) .macro init /* Macro to save temporary registers and prepare magic values. */ - .save {r4-r7} subs sp, sp, #16 .cfi_def_cfa_offset 16 strd r4, r5, [sp, #8] @@ -178,12 +177,13 @@ ENTRY(strcmp) /* Are both strings double-word aligned? */ orr ip, r0, r1 tst ip, #7 - bne do_align + bne .L_do_align /* Fast path. */ + .save {r4-r7} init -doubleword_aligned: +.L_doubleword_aligned: /* Get here when the strings to compare are double-word aligned. */ /* Compare two words in every iteration. */ @@ -196,14 +196,14 @@ doubleword_aligned: ldrd r2, r3, [r0], #8 ldrd r4, r5, [r1], #8 - magic_compare_and_branch w1=r2, w2=r4, label=return_24 - magic_compare_and_branch w1=r3, w2=r5, label=return_35 + magic_compare_and_branch w1=r2, w2=r4, label=.L_return_24 + magic_compare_and_branch w1=r3, w2=r5, label=.L_return_35 b 2b -do_align: +.L_do_align: /* Is the first string word-aligned? */ ands ip, r0, #3 - beq word_aligned_r0 + beq .L_word_aligned_r0 /* Fast compare byte by byte until the first string is word-aligned. */ /* The offset of r0 from a word boundary is in ip. Thus, the number of bytes @@ -211,58 +211,58 @@ do_align: bic r0, r0, #3 ldr r2, [r0], #4 lsls ip, ip, #31 - beq byte2 - bcs byte3 + beq .L_byte2 + bcs .L_byte3 -byte1: +.L_byte1: ldrb ip, [r1], #1 uxtb r3, r2, ror #BYTE1_OFFSET subs ip, r3, ip - bne fast_return - m_cbz reg=r3, label=fast_return + bne .L_fast_return + m_cbz reg=r3, label=.L_fast_return -byte2: +.L_byte2: ldrb ip, [r1], #1 uxtb r3, r2, ror #BYTE2_OFFSET subs ip, r3, ip - bne fast_return - m_cbz reg=r3, label=fast_return + bne .L_fast_return + m_cbz reg=r3, label=.L_fast_return -byte3: +.L_byte3: ldrb ip, [r1], #1 uxtb r3, r2, ror #BYTE3_OFFSET subs ip, r3, ip - bne fast_return - m_cbnz reg=r3, label=word_aligned_r0 + bne .L_fast_return + m_cbnz reg=r3, label=.L_word_aligned_r0 -fast_return: +.L_fast_return: mov r0, ip bx lr -word_aligned_r0: +.L_word_aligned_r0: init /* The first string is word-aligned. */ /* Is the second string word-aligned? */ ands ip, r1, #3 - bne strcmp_unaligned + bne .L_strcmp_unaligned -word_aligned: +.L_word_aligned: /* The strings are word-aligned. */ /* Is the first string double-word aligned? */ tst r0, #4 - beq doubleword_aligned_r0 + beq .L_doubleword_aligned_r0 /* If r0 is not double-word aligned yet, align it by loading and comparing the next word from each string. */ ldr r2, [r0], #4 ldr r4, [r1], #4 - magic_compare_and_branch w1=r2 w2=r4 label=return_24 + magic_compare_and_branch w1=r2 w2=r4 label=.L_return_24 -doubleword_aligned_r0: +.L_doubleword_aligned_r0: /* Get here when r0 is double-word aligned. */ /* Is r1 doubleword_aligned? */ tst r1, #4 - beq doubleword_aligned + beq .L_doubleword_aligned /* Get here when the strings to compare are word-aligned, r0 is double-word aligned, but r1 is not double-word aligned. */ @@ -278,9 +278,9 @@ doubleword_aligned_r0: /* Load the next double-word from each string and compare. */ ldrd r2, r3, [r0], #8 - magic_compare_and_branch w1=r2 w2=r5 label=return_25 + magic_compare_and_branch w1=r2 w2=r5 label=.L_return_25 ldrd r4, r5, [r1], #8 - magic_compare_and_branch w1=r3 w2=r4 label=return_34 + magic_compare_and_branch w1=r3 w2=r4 label=.L_return_34 b 3b .macro miscmp_word offsetlo offsethi @@ -304,46 +304,46 @@ doubleword_aligned_r0: and r2, r3, r6, S2LOMEM #\offsetlo it eq cmpeq r2, r5 - bne return_25 + bne .L_return_25 ldr r5, [r1], #4 cmp ip, #0 eor r3, r2, r3 S2HIMEM r2, r5, #\offsethi it eq cmpeq r3, r2 - bne return_32 + bne .L_return_32 b 7b .endm /* miscmp_word */ -strcmp_unaligned: +.L_strcmp_unaligned: /* r0 is word-aligned, r1 is at offset ip from a word. */ /* Align r1 to the (previous) word-boundary. */ bic r1, r1, #3 /* Unaligned comparison word by word using LDRs. */ cmp ip, #2 - beq miscmp_word_16 /* If ip == 2. */ - bge miscmp_word_24 /* If ip == 3. */ + beq .L_miscmp_word_16 /* If ip == 2. */ + bge .L_miscmp_word_24 /* If ip == 3. */ miscmp_word offsetlo=8 offsethi=24 /* If ip == 1. */ -miscmp_word_24: miscmp_word offsetlo=24 offsethi=8 +.L_miscmp_word_24: miscmp_word offsetlo=24 offsethi=8 -return_32: +.L_return_32: setup_return w1=r3, w2=r2 - b do_return -return_34: + b .L_do_return +.L_return_34: setup_return w1=r3, w2=r4 - b do_return -return_25: + b .L_do_return +.L_return_25: setup_return w1=r2, w2=r5 - b do_return -return_35: + b .L_do_return +.L_return_35: setup_return w1=r3, w2=r5 - b do_return -return_24: + b .L_do_return +.L_return_24: setup_return w1=r2, w2=r4 -do_return: +.L_do_return: #ifdef __ARMEB__ mov r0, ip @@ -354,7 +354,6 @@ do_return: /* Restore temporaries early, before computing the return value. */ ldrd r6, r7, [sp] ldrd r4, r5, [sp, #8] - .pad #-16 adds sp, sp, #16 .cfi_def_cfa_offset 0 .cfi_restore r4 @@ -365,7 +364,7 @@ do_return: /* There is a zero or a different byte between r1 and r2. */ /* r0 contains a mask of all-zero bytes in r1. */ /* Using r0 and not ip here because cbz requires low register. */ - m_cbz reg=r0, label=compute_return_value + m_cbz reg=r0, label=.L_compute_return_value clz r0, r0 /* r0 contains the number of bits on the left of the first all-zero byte in r1. */ rsb r0, r0, #24 @@ -373,7 +372,7 @@ do_return: lsr r1, r1, r0 lsr r2, r2, r0 -compute_return_value: +.L_compute_return_value: movs r0, #1 cmp r1, r2 /* The return value is computed as follows. @@ -393,7 +392,7 @@ compute_return_value: * previous version. See bionic/libc/arch-arm/cortex-a15/bionic/strcmp.S * for the unedited version of this code. */ -miscmp_word_16: +.L_miscmp_word_16: wp1 .req r0 wp2 .req r1 b1 .req r2 @@ -465,7 +464,6 @@ miscmp_word_16: /* Restore registers and stack. */ ldrd r6, r7, [sp] ldrd r4, r5, [sp, #8] - .pad #-16 adds sp, sp, #16 .cfi_def_cfa_offset 0 .cfi_restore r4 @@ -490,7 +488,6 @@ miscmp_word_16: /* Restore registers and stack. */ ldrd r6, r7, [sp] ldrd r4, r5, [sp, #8] - .pad #-16 adds sp, sp, #16 .cfi_def_cfa_offset 0 .cfi_restore r4 |