diff options
-rw-r--r-- | libc/arch-arm/cortex-a15/bionic/strcat.S | 292 | ||||
-rw-r--r-- | libc/arch-arm/cortex-a15/bionic/strlen.S | 60 |
2 files changed, 176 insertions, 176 deletions
diff --git a/libc/arch-arm/cortex-a15/bionic/strcat.S b/libc/arch-arm/cortex-a15/bionic/strcat.S index b95be94..b174aa9 100644 --- a/libc/arch-arm/cortex-a15/bionic/strcat.S +++ b/libc/arch-arm/cortex-a15/bionic/strcat.S @@ -70,7 +70,7 @@ .macro m_scan_byte ldrb r3, [r0] - cbz r3, strcat_r0_scan_done + cbz r3, .L_strcat_r0_scan_done add r0, #1 .endm // m_scan_byte @@ -84,10 +84,10 @@ ENTRY(strcat) // Quick check to see if src is empty. ldrb r2, [r1] pld [r1, #0] - cbnz r2, strcat_continue + cbnz r2, .L_strcat_continue bx lr -strcat_continue: +.L_strcat_continue: // To speed up really small dst strings, unroll checking the first 4 bytes. m_push m_scan_byte @@ -96,95 +96,95 @@ strcat_continue: m_scan_byte ands r3, r0, #7 - beq strcat_mainloop + beq .L_strcat_mainloop // Align to a double word (64 bits). rsb r3, r3, #8 lsls ip, r3, #31 - beq strcat_align_to_32 + beq .L_strcat_align_to_32 ldrb r5, [r0] - cbz r5, strcat_r0_scan_done + cbz r5, .L_strcat_r0_scan_done add r0, r0, #1 -strcat_align_to_32: - bcc strcat_align_to_64 +.L_strcat_align_to_32: + bcc .L_strcat_align_to_64 ldrb r2, [r0] - cbz r2, strcat_r0_scan_done + cbz r2, .L_strcat_r0_scan_done add r0, r0, #1 ldrb r4, [r0] - cbz r4, strcat_r0_scan_done + cbz r4, .L_strcat_r0_scan_done add r0, r0, #1 -strcat_align_to_64: +.L_strcat_align_to_64: tst r3, #4 - beq strcat_mainloop + beq .L_strcat_mainloop ldr r3, [r0], #4 sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcat_zero_in_second_register - b strcat_mainloop + bne .L_strcat_zero_in_second_register + b .L_strcat_mainloop -strcat_r0_scan_done: +.L_strcat_r0_scan_done: // For short copies, hard-code checking the first 8 bytes since this // new code doesn't win until after about 8 bytes. - m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish - m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue - -strcpy_finish: + m_copy_byte reg=r2, cmd=cbz, label=.L_strcpy_finish + m_copy_byte reg=r3, cmd=cbz, label=.L_strcpy_finish + m_copy_byte reg=r4, cmd=cbz, label=.L_strcpy_finish + m_copy_byte reg=r5, cmd=cbz, label=.L_strcpy_finish + m_copy_byte reg=r2, cmd=cbz, label=.L_strcpy_finish + m_copy_byte reg=r3, cmd=cbz, label=.L_strcpy_finish + m_copy_byte reg=r4, cmd=cbz, label=.L_strcpy_finish + m_copy_byte reg=r5, cmd=cbnz, label=.L_strcpy_continue + +.L_strcpy_finish: m_pop -strcpy_continue: +.L_strcpy_continue: ands r3, r0, #7 - beq strcpy_check_src_align + beq .L_strcpy_check_src_align // Align to a double word (64 bits). rsb r3, r3, #8 lsls ip, r3, #31 - beq strcpy_align_to_32 + beq .L_strcpy_align_to_32 ldrb r2, [r1], #1 strb r2, [r0], #1 - cbz r2, strcpy_complete + cbz r2, .L_strcpy_complete -strcpy_align_to_32: - bcc strcpy_align_to_64 +.L_strcpy_align_to_32: + bcc .L_strcpy_align_to_64 ldrb r2, [r1], #1 strb r2, [r0], #1 - cbz r2, strcpy_complete + cbz r2, .L_strcpy_complete ldrb r2, [r1], #1 strb r2, [r0], #1 - cbz r2, strcpy_complete + cbz r2, .L_strcpy_complete -strcpy_align_to_64: +.L_strcpy_align_to_64: tst r3, #4 - beq strcpy_check_src_align + beq .L_strcpy_check_src_align ldr r2, [r1], #4 sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .L_strcpy_zero_in_first_register str r2, [r0], #4 -strcpy_check_src_align: +.L_strcpy_check_src_align: // At this point dst is aligned to a double word, check if src // is also aligned to a double word. ands r3, r1, #7 - bne strcpy_unaligned_copy + bne .L_strcpy_unaligned_copy .p2align 2 -strcpy_mainloop: +.L_strcpy_mainloop: ldrd r2, r3, [r1], #8 pld [r1, #64] @@ -192,128 +192,128 @@ strcpy_mainloop: sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .L_strcpy_zero_in_first_register sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .L_strcpy_zero_in_second_register strd r2, r3, [r0], #8 - b strcpy_mainloop + b .L_strcpy_mainloop -strcpy_complete: +.L_strcpy_complete: m_pop -strcpy_zero_in_first_register: +.L_strcpy_zero_in_first_register: lsls lr, ip, #17 - bne strcpy_copy1byte - bcs strcpy_copy2bytes + bne .L_strcpy_copy1byte + bcs .L_strcpy_copy2bytes lsls ip, ip, #1 - bne strcpy_copy3bytes + bne .L_strcpy_copy3bytes -strcpy_copy4bytes: +.L_strcpy_copy4bytes: // Copy 4 bytes to the destiniation. str r2, [r0] m_pop -strcpy_copy1byte: +.L_strcpy_copy1byte: strb r2, [r0] m_pop -strcpy_copy2bytes: +.L_strcpy_copy2bytes: strh r2, [r0] m_pop -strcpy_copy3bytes: +.L_strcpy_copy3bytes: strh r2, [r0], #2 lsr r2, #16 strb r2, [r0] m_pop -strcpy_zero_in_second_register: +.L_strcpy_zero_in_second_register: lsls lr, ip, #17 - bne strcpy_copy5bytes - bcs strcpy_copy6bytes + bne .L_strcpy_copy5bytes + bcs .L_strcpy_copy6bytes lsls ip, ip, #1 - bne strcpy_copy7bytes + bne .L_strcpy_copy7bytes // Copy 8 bytes to the destination. strd r2, r3, [r0] m_pop -strcpy_copy5bytes: +.L_strcpy_copy5bytes: str r2, [r0], #4 strb r3, [r0] m_pop -strcpy_copy6bytes: +.L_strcpy_copy6bytes: str r2, [r0], #4 strh r3, [r0] m_pop -strcpy_copy7bytes: +.L_strcpy_copy7bytes: str r2, [r0], #4 strh r3, [r0], #2 lsr r3, #16 strb r3, [r0] m_pop -strcpy_unaligned_copy: +.L_strcpy_unaligned_copy: // Dst is aligned to a double word, while src is at an unknown alignment. // There are 7 different versions of the unaligned copy code // to prevent overreading the src. The mainloop of every single version // will store 64 bits per loop. The difference is how much of src can // be read without potentially crossing a page boundary. tbb [pc, r3] -strcpy_unaligned_branchtable: +.L_strcpy_unaligned_branchtable: .byte 0 - .byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2) - .byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2) + .byte ((.L_strcpy_unalign7 - .L_strcpy_unaligned_branchtable)/2) + .byte ((.L_strcpy_unalign6 - .L_strcpy_unaligned_branchtable)/2) + .byte ((.L_strcpy_unalign5 - .L_strcpy_unaligned_branchtable)/2) + .byte ((.L_strcpy_unalign4 - .L_strcpy_unaligned_branchtable)/2) + .byte ((.L_strcpy_unalign3 - .L_strcpy_unaligned_branchtable)/2) + .byte ((.L_strcpy_unalign2 - .L_strcpy_unaligned_branchtable)/2) + .byte ((.L_strcpy_unalign1 - .L_strcpy_unaligned_branchtable)/2) .p2align 2 // Can read 7 bytes before possibly crossing a page. -strcpy_unalign7: +.L_strcpy_unalign7: ldr r2, [r1], #4 sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .L_strcpy_zero_in_first_register ldrb r3, [r1] - cbz r3, strcpy_unalign7_copy5bytes + cbz r3, .L_strcpy_unalign7_copy5bytes ldrb r4, [r1, #1] - cbz r4, strcpy_unalign7_copy6bytes + cbz r4, .L_strcpy_unalign7_copy6bytes ldrb r5, [r1, #2] - cbz r5, strcpy_unalign7_copy7bytes + cbz r5, .L_strcpy_unalign7_copy7bytes ldr r3, [r1], #4 pld [r1, #64] lsrs ip, r3, #24 strd r2, r3, [r0], #8 - beq strcpy_unalign_return - b strcpy_unalign7 + beq .L_strcpy_unalign_return + b .L_strcpy_unalign7 -strcpy_unalign7_copy5bytes: +.L_strcpy_unalign7_copy5bytes: str r2, [r0], #4 strb r3, [r0] -strcpy_unalign_return: +.L_strcpy_unalign_return: m_pop -strcpy_unalign7_copy6bytes: +.L_strcpy_unalign7_copy6bytes: str r2, [r0], #4 strb r3, [r0], #1 strb r4, [r0], #1 m_pop -strcpy_unalign7_copy7bytes: +.L_strcpy_unalign7_copy7bytes: str r2, [r0], #4 strb r3, [r0], #1 strb r4, [r0], #1 @@ -322,41 +322,41 @@ strcpy_unalign7_copy7bytes: .p2align 2 // Can read 6 bytes before possibly crossing a page. -strcpy_unalign6: +.L_strcpy_unalign6: ldr r2, [r1], #4 sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .L_strcpy_zero_in_first_register ldrb r4, [r1] - cbz r4, strcpy_unalign_copy5bytes + cbz r4, .L_strcpy_unalign_copy5bytes ldrb r5, [r1, #1] - cbz r5, strcpy_unalign_copy6bytes + cbz r5, .L_strcpy_unalign_copy6bytes ldr r3, [r1], #4 pld [r1, #64] tst r3, #0xff0000 - beq strcpy_copy7bytes + beq .L_strcpy_copy7bytes lsrs ip, r3, #24 strd r2, r3, [r0], #8 - beq strcpy_unalign_return - b strcpy_unalign6 + beq .L_strcpy_unalign_return + b .L_strcpy_unalign6 .p2align 2 // Can read 5 bytes before possibly crossing a page. -strcpy_unalign5: +.L_strcpy_unalign5: ldr r2, [r1], #4 sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .L_strcpy_zero_in_first_register ldrb r4, [r1] - cbz r4, strcpy_unalign_copy5bytes + cbz r4, .L_strcpy_unalign_copy5bytes ldr r3, [r1], #4 @@ -365,17 +365,17 @@ strcpy_unalign5: sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .L_strcpy_zero_in_second_register strd r2, r3, [r0], #8 - b strcpy_unalign5 + b .L_strcpy_unalign5 -strcpy_unalign_copy5bytes: +.L_strcpy_unalign_copy5bytes: str r2, [r0], #4 strb r4, [r0] m_pop -strcpy_unalign_copy6bytes: +.L_strcpy_unalign_copy6bytes: str r2, [r0], #4 strb r4, [r0], #1 strb r5, [r0] @@ -383,13 +383,13 @@ strcpy_unalign_copy6bytes: .p2align 2 // Can read 4 bytes before possibly crossing a page. -strcpy_unalign4: +.L_strcpy_unalign4: ldr r2, [r1], #4 sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .L_strcpy_zero_in_first_register ldr r3, [r1], #4 pld [r1, #64] @@ -397,20 +397,20 @@ strcpy_unalign4: sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .L_strcpy_zero_in_second_register strd r2, r3, [r0], #8 - b strcpy_unalign4 + b .L_strcpy_unalign4 .p2align 2 // Can read 3 bytes before possibly crossing a page. -strcpy_unalign3: +.L_strcpy_unalign3: ldrb r2, [r1] - cbz r2, strcpy_unalign3_copy1byte + cbz r2, .L_strcpy_unalign3_copy1byte ldrb r3, [r1, #1] - cbz r3, strcpy_unalign3_copy2bytes + cbz r3, .L_strcpy_unalign3_copy2bytes ldrb r4, [r1, #2] - cbz r4, strcpy_unalign3_copy3bytes + cbz r4, .L_strcpy_unalign3_copy3bytes ldr r2, [r1], #4 ldr r3, [r1], #4 @@ -418,26 +418,26 @@ strcpy_unalign3: pld [r1, #64] lsrs lr, r2, #24 - beq strcpy_copy4bytes + beq .L_strcpy_copy4bytes sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .L_strcpy_zero_in_second_register strd r2, r3, [r0], #8 - b strcpy_unalign3 + b .L_strcpy_unalign3 -strcpy_unalign3_copy1byte: +.L_strcpy_unalign3_copy1byte: strb r2, [r0] m_pop -strcpy_unalign3_copy2bytes: +.L_strcpy_unalign3_copy2bytes: strb r2, [r0], #1 strb r3, [r0] m_pop -strcpy_unalign3_copy3bytes: +.L_strcpy_unalign3_copy3bytes: strb r2, [r0], #1 strb r3, [r0], #1 strb r4, [r0] @@ -445,34 +445,34 @@ strcpy_unalign3_copy3bytes: .p2align 2 // Can read 2 bytes before possibly crossing a page. -strcpy_unalign2: +.L_strcpy_unalign2: ldrb r2, [r1] - cbz r2, strcpy_unalign_copy1byte + cbz r2, .L_strcpy_unalign_copy1byte ldrb r4, [r1, #1] - cbz r4, strcpy_unalign_copy2bytes + cbz r4, .L_strcpy_unalign_copy2bytes ldr r2, [r1], #4 ldr r3, [r1], #4 pld [r1, #64] tst r2, #0xff0000 - beq strcpy_copy3bytes + beq .L_strcpy_copy3bytes lsrs ip, r2, #24 - beq strcpy_copy4bytes + beq .L_strcpy_copy4bytes sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .L_strcpy_zero_in_second_register strd r2, r3, [r0], #8 - b strcpy_unalign2 + b .L_strcpy_unalign2 .p2align 2 // Can read 1 byte before possibly crossing a page. -strcpy_unalign1: +.L_strcpy_unalign1: ldrb r2, [r1] - cbz r2, strcpy_unalign_copy1byte + cbz r2, .L_strcpy_unalign_copy1byte ldr r2, [r1], #4 ldr r3, [r1], #4 @@ -482,27 +482,27 @@ strcpy_unalign1: sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcpy_zero_in_first_register + bne .L_strcpy_zero_in_first_register sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcpy_zero_in_second_register + bne .L_strcpy_zero_in_second_register strd r2, r3, [r0], #8 - b strcpy_unalign1 + b .L_strcpy_unalign1 -strcpy_unalign_copy1byte: +.L_strcpy_unalign_copy1byte: strb r2, [r0] m_pop -strcpy_unalign_copy2bytes: +.L_strcpy_unalign_copy2bytes: strb r2, [r0], #1 strb r4, [r0] m_pop .p2align 2 -strcat_mainloop: +.L_strcat_mainloop: ldrd r2, r3, [r0], #8 pld [r0, #64] @@ -510,59 +510,59 @@ strcat_mainloop: sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne strcat_zero_in_first_register + bne .L_strcat_zero_in_first_register sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne strcat_zero_in_second_register - b strcat_mainloop + bne .L_strcat_zero_in_second_register + b .L_strcat_mainloop -strcat_zero_in_first_register: +.L_strcat_zero_in_first_register: // Prefetch the src now, it's going to be used soon. pld [r1, #0] lsls lr, ip, #17 - bne strcat_sub8 - bcs strcat_sub7 + bne .L_strcat_sub8 + bcs .L_strcat_sub7 lsls ip, ip, #1 - bne strcat_sub6 + bne .L_strcat_sub6 sub r0, r0, #5 - b strcat_r0_scan_done + b .L_strcat_r0_scan_done -strcat_sub8: +.L_strcat_sub8: sub r0, r0, #8 - b strcat_r0_scan_done + b .L_strcat_r0_scan_done -strcat_sub7: +.L_strcat_sub7: sub r0, r0, #7 - b strcat_r0_scan_done + b .L_strcat_r0_scan_done -strcat_sub6: +.L_strcat_sub6: sub r0, r0, #6 - b strcat_r0_scan_done + b .L_strcat_r0_scan_done -strcat_zero_in_second_register: +.L_strcat_zero_in_second_register: // Prefetch the src now, it's going to be used soon. pld [r1, #0] lsls lr, ip, #17 - bne strcat_sub4 - bcs strcat_sub3 + bne .L_strcat_sub4 + bcs .L_strcat_sub3 lsls ip, ip, #1 - bne strcat_sub2 + bne .L_strcat_sub2 sub r0, r0, #1 - b strcat_r0_scan_done + b .L_strcat_r0_scan_done -strcat_sub4: +.L_strcat_sub4: sub r0, r0, #4 - b strcat_r0_scan_done + b .L_strcat_r0_scan_done -strcat_sub3: +.L_strcat_sub3: sub r0, r0, #3 - b strcat_r0_scan_done + b .L_strcat_r0_scan_done -strcat_sub2: +.L_strcat_sub2: sub r0, r0, #2 - b strcat_r0_scan_done + b .L_strcat_r0_scan_done END(strcat) diff --git a/libc/arch-arm/cortex-a15/bionic/strlen.S b/libc/arch-arm/cortex-a15/bionic/strlen.S index 9a0ce62..4fd6284 100644 --- a/libc/arch-arm/cortex-a15/bionic/strlen.S +++ b/libc/arch-arm/cortex-a15/bionic/strlen.S @@ -65,38 +65,38 @@ ENTRY(strlen) mov r1, r0 ands r3, r0, #7 - beq mainloop + beq .L_mainloop // Align to a double word (64 bits). rsb r3, r3, #8 lsls ip, r3, #31 - beq align_to_32 + beq .L_align_to_32 ldrb r2, [r1], #1 - cbz r2, update_count_and_return + cbz r2, .L_update_count_and_return -align_to_32: - bcc align_to_64 +.L_align_to_32: + bcc .L_align_to_64 ands ip, r3, #2 - beq align_to_64 + beq .L_align_to_64 ldrb r2, [r1], #1 - cbz r2, update_count_and_return + cbz r2, .L_update_count_and_return ldrb r2, [r1], #1 - cbz r2, update_count_and_return + cbz r2, .L_update_count_and_return -align_to_64: +.L_align_to_64: tst r3, #4 - beq mainloop + beq .L_mainloop ldr r3, [r1], #4 sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne zero_in_second_register + bne .L_zero_in_second_register .p2align 2 -mainloop: +.L_mainloop: ldrd r2, r3, [r1], #8 pld [r1, #64] @@ -104,62 +104,62 @@ mainloop: sub ip, r2, #0x01010101 bic ip, ip, r2 ands ip, ip, #0x80808080 - bne zero_in_first_register + bne .L_zero_in_first_register sub ip, r3, #0x01010101 bic ip, ip, r3 ands ip, ip, #0x80808080 - bne zero_in_second_register - b mainloop + bne .L_zero_in_second_register + b .L_mainloop -update_count_and_return: +.L_update_count_and_return: sub r0, r1, r0 sub r0, r0, #1 bx lr -zero_in_first_register: +.L_zero_in_first_register: sub r0, r1, r0 lsls r3, ip, #17 - bne sub8_and_return - bcs sub7_and_return + bne .L_sub8_and_return + bcs .L_sub7_and_return lsls ip, ip, #1 - bne sub6_and_return + bne .L_sub6_and_return sub r0, r0, #5 bx lr -sub8_and_return: +.L_sub8_and_return: sub r0, r0, #8 bx lr -sub7_and_return: +.L_sub7_and_return: sub r0, r0, #7 bx lr -sub6_and_return: +.L_sub6_and_return: sub r0, r0, #6 bx lr -zero_in_second_register: +.L_zero_in_second_register: sub r0, r1, r0 lsls r3, ip, #17 - bne sub4_and_return - bcs sub3_and_return + bne .L_sub4_and_return + bcs .L_sub3_and_return lsls ip, ip, #1 - bne sub2_and_return + bne .L_sub2_and_return sub r0, r0, #1 bx lr -sub4_and_return: +.L_sub4_and_return: sub r0, r0, #4 bx lr -sub3_and_return: +.L_sub3_and_return: sub r0, r0, #3 bx lr -sub2_and_return: +.L_sub2_and_return: sub r0, r0, #2 bx lr END(strlen) |