summaryrefslogtreecommitdiffstats
path: root/libc/arch-arm
diff options
context:
space:
mode:
Diffstat (limited to 'libc/arch-arm')
-rw-r--r--libc/arch-arm/arm.mk3
-rw-r--r--libc/arch-arm/cortex-a15/bionic/__strcat_chk.S192
-rw-r--r--libc/arch-arm/cortex-a15/bionic/__strcat_chk_common.S212
-rw-r--r--libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S156
-rw-r--r--libc/arch-arm/cortex-a15/bionic/__strcpy_chk_common.S173
-rw-r--r--libc/arch-arm/cortex-a15/bionic/memcpy.S79
-rw-r--r--libc/arch-arm/cortex-a15/bionic/memcpy_base.S42
-rw-r--r--libc/arch-arm/cortex-a15/bionic/memcpy_common.S103
-rw-r--r--libc/arch-arm/cortex-a15/bionic/strcat.S311
-rw-r--r--libc/arch-arm/cortex-a15/bionic/string_copy.S21
-rw-r--r--libc/arch-arm/cortex-a15/bionic/strlen.S60
-rw-r--r--libc/arch-arm/cortex-a15/cortex-a15.mk1
-rw-r--r--libc/arch-arm/cortex-a53.a57/cortex-a53.a57.mk22
-rw-r--r--libc/arch-arm/cortex-a53/bionic/__strcat_chk.S32
-rw-r--r--libc/arch-arm/cortex-a53/bionic/__strcpy_chk.S32
-rw-r--r--libc/arch-arm/cortex-a53/bionic/memcpy.S32
-rw-r--r--libc/arch-arm/cortex-a53/bionic/memcpy_base.S143
-rw-r--r--libc/arch-arm/cortex-a53/cortex-a53.mk22
-rw-r--r--libc/arch-arm/cortex-a7/bionic/memset.S180
-rw-r--r--libc/arch-arm/cortex-a7/cortex-a7.mk20
-rw-r--r--libc/arch-arm/cortex-a9/bionic/memcpy_base.S3
-rw-r--r--libc/arch-arm/cortex-a9/bionic/memset.S33
-rw-r--r--libc/arch-arm/cortex-a9/bionic/strcat.S261
-rw-r--r--libc/arch-arm/cortex-a9/bionic/string_copy.S21
-rw-r--r--libc/arch-arm/cortex-a9/cortex-a9.mk1
-rw-r--r--libc/arch-arm/denver/denver.mk1
-rw-r--r--libc/arch-arm/generic/bionic/memchr.S155
-rw-r--r--libc/arch-arm/generic/bionic/memcmp.S3
-rw-r--r--libc/arch-arm/generic/bionic/memcpy.S6
-rw-r--r--libc/arch-arm/generic/bionic/memset.S6
-rw-r--r--libc/arch-arm/generic/generic.mk1
-rw-r--r--libc/arch-arm/krait/bionic/__strcat_chk.S19
-rw-r--r--libc/arch-arm/krait/bionic/__strcpy_chk.S15
-rw-r--r--libc/arch-arm/krait/bionic/memcpy.S17
-rw-r--r--libc/arch-arm/krait/bionic/memcpy_base.S314
-rw-r--r--libc/arch-arm/krait/bionic/memmove.S219
-rw-r--r--libc/arch-arm/krait/bionic/memset.S20
-rw-r--r--libc/arch-arm/krait/krait.mk16
-rw-r--r--libc/arch-arm/scorpion/scorpion.mk18
39 files changed, 2010 insertions, 955 deletions
diff --git a/libc/arch-arm/arm.mk b/libc/arch-arm/arm.mk
index d72a160..c2b80c5 100644
--- a/libc/arch-arm/arm.mk
+++ b/libc/arch-arm/arm.mk
@@ -20,7 +20,6 @@ libc_freebsd_src_files_arm += \
upstream-freebsd/lib/libc/string/wmemmove.c \
libc_openbsd_src_files_arm += \
- upstream-openbsd/lib/libc/string/memchr.c \
upstream-openbsd/lib/libc/string/memrchr.c \
upstream-openbsd/lib/libc/string/stpncpy.c \
upstream-openbsd/lib/libc/string/strlcat.c \
@@ -52,7 +51,7 @@ ifeq ($(strip $(TARGET_$(my_2nd_arch_prefix)CPU_VARIANT)),)
endif
cpu_variant_mk := $(LOCAL_PATH)/arch-arm/$(TARGET_$(my_2nd_arch_prefix)CPU_VARIANT)/$(TARGET_$(my_2nd_arch_prefix)CPU_VARIANT).mk
ifeq ($(wildcard $(cpu_variant_mk)),)
-$(error "TARGET_$(my_2nd_arch_prefix)CPU_VARIANT not set or set to an unknown value. Possible values are cortex-a7, cortex-a8, cortex-a9, cortex-a15, krait, denver. Use generic for devices that do not have a CPU similar to any of the supported cpu variants.")
+$(error "TARGET_$(my_2nd_arch_prefix)CPU_VARIANT not set or set to an unknown value. Possible values are cortex-a7, cortex-a8, cortex-a9, cortex-a15, krait, scorpion, denver. Use generic for devices that do not have a CPU similar to any of the supported cpu variants.")
endif
include $(cpu_variant_mk)
libc_common_additional_dependencies += $(cpu_variant_mk)
diff --git a/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S b/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S
index a2e9c22..3692f04 100644
--- a/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S
+++ b/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2013 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -26,191 +26,7 @@
* SUCH DAMAGE.
*/
-#include <private/bionic_asm.h>
-#include <private/libc_events.h>
+// Indicate which memcpy base file to include.
+#define MEMCPY_BASE "memcpy_base.S"
- .syntax unified
-
- .thumb
- .thumb_func
-
-// Get the length of src string, then get the source of the dst string.
-// Check that the two lengths together don't exceed the threshold, then
-// do a memcpy of the data.
-ENTRY(__strcat_chk)
- pld [r0, #0]
- push {r0, lr}
- .cfi_def_cfa_offset 8
- .cfi_rel_offset r0, 0
- .cfi_rel_offset lr, 4
- push {r4, r5}
- .cfi_adjust_cfa_offset 8
- .cfi_rel_offset r4, 0
- .cfi_rel_offset r5, 4
-
- mov lr, r2
-
- // Save the dst register to r5
- mov r5, r0
-
- // Zero out r4
- eor r4, r4, r4
-
- // r1 contains the address of the string to count.
-.L_strlen_start:
- mov r0, r1
- ands r3, r1, #7
- beq .L_mainloop
-
- // Align to a double word (64 bits).
- rsb r3, r3, #8
- lsls ip, r3, #31
- beq .L_align_to_32
-
- ldrb r2, [r1], #1
- cbz r2, .L_update_count_and_finish
-
-.L_align_to_32:
- bcc .L_align_to_64
- ands ip, r3, #2
- beq .L_align_to_64
-
- ldrb r2, [r1], #1
- cbz r2, .L_update_count_and_finish
- ldrb r2, [r1], #1
- cbz r2, .L_update_count_and_finish
-
-.L_align_to_64:
- tst r3, #4
- beq .L_mainloop
- ldr r3, [r1], #4
-
- sub ip, r3, #0x01010101
- bic ip, ip, r3
- ands ip, ip, #0x80808080
- bne .L_zero_in_second_register
-
- .p2align 2
-.L_mainloop:
- ldrd r2, r3, [r1], #8
-
- pld [r1, #64]
-
- sub ip, r2, #0x01010101
- bic ip, ip, r2
- ands ip, ip, #0x80808080
- bne .L_zero_in_first_register
-
- sub ip, r3, #0x01010101
- bic ip, ip, r3
- ands ip, ip, #0x80808080
- bne .L_zero_in_second_register
- b .L_mainloop
-
-.L_update_count_and_finish:
- sub r3, r1, r0
- sub r3, r3, #1
- b .L_finish
-
-.L_zero_in_first_register:
- sub r3, r1, r0
- lsls r2, ip, #17
- bne .L_sub8_and_finish
- bcs .L_sub7_and_finish
- lsls ip, ip, #1
- bne .L_sub6_and_finish
-
- sub r3, r3, #5
- b .L_finish
-
-.L_sub8_and_finish:
- sub r3, r3, #8
- b .L_finish
-
-.L_sub7_and_finish:
- sub r3, r3, #7
- b .L_finish
-
-.L_sub6_and_finish:
- sub r3, r3, #6
- b .L_finish
-
-.L_zero_in_second_register:
- sub r3, r1, r0
- lsls r2, ip, #17
- bne .L_sub4_and_finish
- bcs .L_sub3_and_finish
- lsls ip, ip, #1
- bne .L_sub2_and_finish
-
- sub r3, r3, #1
- b .L_finish
-
-.L_sub4_and_finish:
- sub r3, r3, #4
- b .L_finish
-
-.L_sub3_and_finish:
- sub r3, r3, #3
- b .L_finish
-
-.L_sub2_and_finish:
- sub r3, r3, #2
-
-.L_finish:
- cmp r4, #0
- bne .L_strlen_done
-
- // Time to get the dst string length.
- mov r1, r5
-
- // Save the original source address to r5.
- mov r5, r0
-
- // Save the current length (adding 1 for the terminator).
- add r4, r3, #1
- b .L_strlen_start
-
- // r0 holds the pointer to the dst string.
- // r3 holds the dst string length.
- // r4 holds the src string length + 1.
-.L_strlen_done:
- add r2, r3, r4
- cmp r2, lr
- bhi __strcat_chk_failed
-
- // Set up the registers for the memcpy code.
- mov r1, r5
- pld [r1, #64]
- mov r2, r4
- add r0, r0, r3
- pop {r4, r5}
-END(__strcat_chk)
-
-#define MEMCPY_BASE __strcat_chk_memcpy_base
-#define MEMCPY_BASE_ALIGNED __strcat_chk_memcpy_base_aligned
-
-#include "memcpy_base.S"
-
-ENTRY_PRIVATE(__strcat_chk_failed)
- .cfi_def_cfa_offset 8
- .cfi_rel_offset r0, 0
- .cfi_rel_offset lr, 4
- .cfi_adjust_cfa_offset 8
- .cfi_rel_offset r4, 0
- .cfi_rel_offset r5, 4
-
- ldr r0, error_message
- ldr r1, error_code
-1:
- add r0, pc
- bl __fortify_chk_fail
-error_code:
- .word BIONIC_EVENT_STRCAT_BUFFER_OVERFLOW
-error_message:
- .word error_string-(1b+4)
-END(__strcat_chk_failed)
-
- .data
-error_string:
- .string "strcat: prevented write past end of buffer"
+#include "__strcat_chk_common.S"
diff --git a/libc/arch-arm/cortex-a15/bionic/__strcat_chk_common.S b/libc/arch-arm/cortex-a15/bionic/__strcat_chk_common.S
new file mode 100644
index 0000000..de66967
--- /dev/null
+++ b/libc/arch-arm/cortex-a15/bionic/__strcat_chk_common.S
@@ -0,0 +1,212 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <private/bionic_asm.h>
+#include <private/libc_events.h>
+
+ .syntax unified
+
+ .thumb
+ .thumb_func
+
+// Get the length of src string, then get the source of the dst string.
+// Check that the two lengths together don't exceed the threshold, then
+// do a memcpy of the data.
+ENTRY(__strcat_chk)
+ pld [r0, #0]
+ push {r0, lr}
+ .cfi_def_cfa_offset 8
+ .cfi_rel_offset r0, 0
+ .cfi_rel_offset lr, 4
+ push {r4, r5}
+ .cfi_adjust_cfa_offset 8
+ .cfi_rel_offset r4, 0
+ .cfi_rel_offset r5, 4
+
+ mov lr, r2
+
+ // Save the dst register to r5
+ mov r5, r0
+
+ // Zero out r4
+ eor r4, r4, r4
+
+ // r1 contains the address of the string to count.
+.L_strlen_start:
+ mov r0, r1
+ ands r3, r1, #7
+ beq .L_mainloop
+
+ // Align to a double word (64 bits).
+ rsb r3, r3, #8
+ lsls ip, r3, #31
+ beq .L_align_to_32
+
+ ldrb r2, [r1], #1
+ cbz r2, .L_update_count_and_finish
+
+.L_align_to_32:
+ bcc .L_align_to_64
+ ands ip, r3, #2
+ beq .L_align_to_64
+
+ ldrb r2, [r1], #1
+ cbz r2, .L_update_count_and_finish
+ ldrb r2, [r1], #1
+ cbz r2, .L_update_count_and_finish
+
+.L_align_to_64:
+ tst r3, #4
+ beq .L_mainloop
+ ldr r3, [r1], #4
+
+ sub ip, r3, #0x01010101
+ bic ip, ip, r3
+ ands ip, ip, #0x80808080
+ bne .L_zero_in_second_register
+
+ .p2align 2
+.L_mainloop:
+ ldrd r2, r3, [r1], #8
+
+ pld [r1, #64]
+
+ sub ip, r2, #0x01010101
+ bic ip, ip, r2
+ ands ip, ip, #0x80808080
+ bne .L_zero_in_first_register
+
+ sub ip, r3, #0x01010101
+ bic ip, ip, r3
+ ands ip, ip, #0x80808080
+ bne .L_zero_in_second_register
+ b .L_mainloop
+
+.L_update_count_and_finish:
+ sub r3, r1, r0
+ sub r3, r3, #1
+ b .L_finish
+
+.L_zero_in_first_register:
+ sub r3, r1, r0
+ lsls r2, ip, #17
+ bne .L_sub8_and_finish
+ bcs .L_sub7_and_finish
+ lsls ip, ip, #1
+ bne .L_sub6_and_finish
+
+ sub r3, r3, #5
+ b .L_finish
+
+.L_sub8_and_finish:
+ sub r3, r3, #8
+ b .L_finish
+
+.L_sub7_and_finish:
+ sub r3, r3, #7
+ b .L_finish
+
+.L_sub6_and_finish:
+ sub r3, r3, #6
+ b .L_finish
+
+.L_zero_in_second_register:
+ sub r3, r1, r0
+ lsls r2, ip, #17
+ bne .L_sub4_and_finish
+ bcs .L_sub3_and_finish
+ lsls ip, ip, #1
+ bne .L_sub2_and_finish
+
+ sub r3, r3, #1
+ b .L_finish
+
+.L_sub4_and_finish:
+ sub r3, r3, #4
+ b .L_finish
+
+.L_sub3_and_finish:
+ sub r3, r3, #3
+ b .L_finish
+
+.L_sub2_and_finish:
+ sub r3, r3, #2
+
+.L_finish:
+ cmp r4, #0
+ bne .L_strlen_done
+
+ // Time to get the dst string length.
+ mov r1, r5
+
+ // Save the original source address to r5.
+ mov r5, r0
+
+ // Save the current length (adding 1 for the terminator).
+ add r4, r3, #1
+ b .L_strlen_start
+
+ // r0 holds the pointer to the dst string.
+ // r3 holds the dst string length.
+ // r4 holds the src string length + 1.
+.L_strlen_done:
+ add r2, r3, r4
+ cmp r2, lr
+ bhi .L_strcat_chk_failed
+
+ // Set up the registers for the memcpy code.
+ mov r1, r5
+ pld [r1, #64]
+ mov r2, r4
+ add r0, r0, r3
+ pop {r4, r5}
+ .cfi_adjust_cfa_offset -8
+ .cfi_restore r4
+ .cfi_restore r5
+
+#include MEMCPY_BASE
+
+ // Undo the above cfi directives
+ .cfi_adjust_cfa_offset 8
+ .cfi_rel_offset r4, 0
+ .cfi_rel_offset r5, 4
+.L_strcat_chk_failed:
+ ldr r0, error_message
+ ldr r1, error_code
+1:
+ add r0, pc
+ bl __fortify_chk_fail
+error_code:
+ .word BIONIC_EVENT_STRCAT_BUFFER_OVERFLOW
+error_message:
+ .word error_string-(1b+4)
+END(__strcat_chk)
+
+ .data
+error_string:
+ .string "strcat: prevented write past end of buffer"
diff --git a/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S b/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S
index db76686..d8cb3d9 100644
--- a/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S
+++ b/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2013 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -26,155 +26,7 @@
* SUCH DAMAGE.
*/
-#include <private/bionic_asm.h>
-#include <private/libc_events.h>
+// Indicate which memcpy base file to include.
+#define MEMCPY_BASE "memcpy_base.S"
- .syntax unified
-
- .thumb
- .thumb_func
-
-// Get the length of the source string first, then do a memcpy of the data
-// instead of a strcpy.
-ENTRY(__strcpy_chk)
- pld [r0, #0]
- push {r0, lr}
- .cfi_def_cfa_offset 8
- .cfi_rel_offset r0, 0
- .cfi_rel_offset lr, 4
-
- mov lr, r2
- mov r0, r1
-
- ands r3, r1, #7
- beq .L_mainloop
-
- // Align to a double word (64 bits).
- rsb r3, r3, #8
- lsls ip, r3, #31
- beq .L_align_to_32
-
- ldrb r2, [r0], #1
- cbz r2, .L_update_count_and_finish
-
-.L_align_to_32:
- bcc .L_align_to_64
- ands ip, r3, #2
- beq .L_align_to_64
-
- ldrb r2, [r0], #1
- cbz r2, .L_update_count_and_finish
- ldrb r2, [r0], #1
- cbz r2, .L_update_count_and_finish
-
-.L_align_to_64:
- tst r3, #4
- beq .L_mainloop
- ldr r3, [r0], #4
-
- sub ip, r3, #0x01010101
- bic ip, ip, r3
- ands ip, ip, #0x80808080
- bne .L_zero_in_second_register
-
- .p2align 2
-.L_mainloop:
- ldrd r2, r3, [r0], #8
-
- pld [r0, #64]
-
- sub ip, r2, #0x01010101
- bic ip, ip, r2
- ands ip, ip, #0x80808080
- bne .L_zero_in_first_register
-
- sub ip, r3, #0x01010101
- bic ip, ip, r3
- ands ip, ip, #0x80808080
- bne .L_zero_in_second_register
- b .L_mainloop
-
-.L_update_count_and_finish:
- sub r3, r0, r1
- sub r3, r3, #1
- b .L_check_size
-
-.L_zero_in_first_register:
- sub r3, r0, r1
- lsls r2, ip, #17
- bne .L_sub8_and_finish
- bcs .L_sub7_and_finish
- lsls ip, ip, #1
- bne .L_sub6_and_finish
-
- sub r3, r3, #5
- b .L_check_size
-
-.L_sub8_and_finish:
- sub r3, r3, #8
- b .L_check_size
-
-.L_sub7_and_finish:
- sub r3, r3, #7
- b .L_check_size
-
-.L_sub6_and_finish:
- sub r3, r3, #6
- b .L_check_size
-
-.L_zero_in_second_register:
- sub r3, r0, r1
- lsls r2, ip, #17
- bne .L_sub4_and_finish
- bcs .L_sub3_and_finish
- lsls ip, ip, #1
- bne .L_sub2_and_finish
-
- sub r3, r3, #1
- b .L_check_size
-
-.L_sub4_and_finish:
- sub r3, r3, #4
- b .L_check_size
-
-.L_sub3_and_finish:
- sub r3, r3, #3
- b .L_check_size
-
-.L_sub2_and_finish:
- sub r3, r3, #2
-
-.L_check_size:
- pld [r1, #0]
- pld [r1, #64]
- ldr r0, [sp]
- cmp r3, lr
- bhs __strcpy_chk_failed
-
- // Add 1 for copy length to get the string terminator.
- add r2, r3, #1
-END(__strcpy_chk)
-
-#define MEMCPY_BASE __strcpy_chk_memcpy_base
-#define MEMCPY_BASE_ALIGNED __strcpy_chk_memcpy_base_aligned
-#include "memcpy_base.S"
-
-ENTRY_PRIVATE(__strcpy_chk_failed)
- .cfi_def_cfa_offset 8
- .cfi_rel_offset r0, 0
- .cfi_rel_offset lr, 4
-
- ldr r0, error_message
- ldr r1, error_code
-1:
- add r0, pc
- bl __fortify_chk_fail
-error_code:
- .word BIONIC_EVENT_STRCPY_BUFFER_OVERFLOW
-error_message:
- .word error_string-(1b+4)
-END(__strcpy_chk_failed)
-
- .data
-error_string:
- .string "strcpy: prevented write past end of buffer"
+#include "__strcpy_chk_common.S"
diff --git a/libc/arch-arm/cortex-a15/bionic/__strcpy_chk_common.S b/libc/arch-arm/cortex-a15/bionic/__strcpy_chk_common.S
new file mode 100644
index 0000000..69ebcb4
--- /dev/null
+++ b/libc/arch-arm/cortex-a15/bionic/__strcpy_chk_common.S
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <private/bionic_asm.h>
+#include <private/libc_events.h>
+
+ .syntax unified
+
+ .thumb
+ .thumb_func
+
+// Get the length of the source string first, then do a memcpy of the data
+// instead of a strcpy.
+ENTRY(__strcpy_chk)
+ pld [r0, #0]
+ push {r0, lr}
+ .cfi_def_cfa_offset 8
+ .cfi_rel_offset r0, 0
+ .cfi_rel_offset lr, 4
+
+ mov lr, r2
+ mov r0, r1
+
+ ands r3, r1, #7
+ beq .L_mainloop
+
+ // Align to a double word (64 bits).
+ rsb r3, r3, #8
+ lsls ip, r3, #31
+ beq .L_align_to_32
+
+ ldrb r2, [r0], #1
+ cbz r2, .L_update_count_and_finish
+
+.L_align_to_32:
+ bcc .L_align_to_64
+ ands ip, r3, #2
+ beq .L_align_to_64
+
+ ldrb r2, [r0], #1
+ cbz r2, .L_update_count_and_finish
+ ldrb r2, [r0], #1
+ cbz r2, .L_update_count_and_finish
+
+.L_align_to_64:
+ tst r3, #4
+ beq .L_mainloop
+ ldr r3, [r0], #4
+
+ sub ip, r3, #0x01010101
+ bic ip, ip, r3
+ ands ip, ip, #0x80808080
+ bne .L_zero_in_second_register
+
+ .p2align 2
+.L_mainloop:
+ ldrd r2, r3, [r0], #8
+
+ pld [r0, #64]
+
+ sub ip, r2, #0x01010101
+ bic ip, ip, r2
+ ands ip, ip, #0x80808080
+ bne .L_zero_in_first_register
+
+ sub ip, r3, #0x01010101
+ bic ip, ip, r3
+ ands ip, ip, #0x80808080
+ bne .L_zero_in_second_register
+ b .L_mainloop
+
+.L_update_count_and_finish:
+ sub r3, r0, r1
+ sub r3, r3, #1
+ b .L_check_size
+
+.L_zero_in_first_register:
+ sub r3, r0, r1
+ lsls r2, ip, #17
+ bne .L_sub8_and_finish
+ bcs .L_sub7_and_finish
+ lsls ip, ip, #1
+ bne .L_sub6_and_finish
+
+ sub r3, r3, #5
+ b .L_check_size
+
+.L_sub8_and_finish:
+ sub r3, r3, #8
+ b .L_check_size
+
+.L_sub7_and_finish:
+ sub r3, r3, #7
+ b .L_check_size
+
+.L_sub6_and_finish:
+ sub r3, r3, #6
+ b .L_check_size
+
+.L_zero_in_second_register:
+ sub r3, r0, r1
+ lsls r2, ip, #17
+ bne .L_sub4_and_finish
+ bcs .L_sub3_and_finish
+ lsls ip, ip, #1
+ bne .L_sub2_and_finish
+
+ sub r3, r3, #1
+ b .L_check_size
+
+.L_sub4_and_finish:
+ sub r3, r3, #4
+ b .L_check_size
+
+.L_sub3_and_finish:
+ sub r3, r3, #3
+ b .L_check_size
+
+.L_sub2_and_finish:
+ sub r3, r3, #2
+
+.L_check_size:
+ pld [r1, #0]
+ pld [r1, #64]
+ ldr r0, [sp]
+ cmp r3, lr
+ bhs .L_strcpy_chk_failed
+
+ // Add 1 for copy length to get the string terminator.
+ add r2, r3, #1
+
+#include MEMCPY_BASE
+
+.L_strcpy_chk_failed:
+ ldr r0, error_message
+ ldr r1, error_code
+1:
+ add r0, pc
+ bl __fortify_chk_fail
+error_code:
+ .word BIONIC_EVENT_STRCPY_BUFFER_OVERFLOW
+error_message:
+ .word error_string-(1b+4)
+END(__strcpy_chk)
+
+ .data
+error_string:
+ .string "strcpy: prevented write past end of buffer"
diff --git a/libc/arch-arm/cortex-a15/bionic/memcpy.S b/libc/arch-arm/cortex-a15/bionic/memcpy.S
index 410b663..537f3de 100644
--- a/libc/arch-arm/cortex-a15/bionic/memcpy.S
+++ b/libc/arch-arm/cortex-a15/bionic/memcpy.S
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2008 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -25,79 +25,8 @@
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
-/*
- * Copyright (c) 2013 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- * products derived from this software without specific prior written
- * permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// Prototype: void *memcpy (void *dst, const void *src, size_t count).
-
-#include <private/bionic_asm.h>
-#include <private/libc_events.h>
-
- .text
- .syntax unified
- .fpu neon
-
-ENTRY(__memcpy_chk)
- cmp r2, r3
- bhi __memcpy_chk_fail
-
- // Fall through to memcpy...
-END(__memcpy_chk)
-
-ENTRY(memcpy)
- pld [r1, #64]
- push {r0, lr}
- .cfi_def_cfa_offset 8
- .cfi_rel_offset r0, 0
- .cfi_rel_offset lr, 4
-END(memcpy)
-
-#define MEMCPY_BASE __memcpy_base
-#define MEMCPY_BASE_ALIGNED __memcpy_base_aligned
-#include "memcpy_base.S"
-
-ENTRY_PRIVATE(__memcpy_chk_fail)
- // Preserve lr for backtrace.
- push {lr}
- .cfi_def_cfa_offset 4
- .cfi_rel_offset lr, 0
- ldr r0, error_message
- ldr r1, error_code
-1:
- add r0, pc
- bl __fortify_chk_fail
-error_code:
- .word BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW
-error_message:
- .word error_string-(1b+8)
-END(__memcpy_chk_fail)
+// Indicate which memcpy base file to include.
+#define MEMCPY_BASE "memcpy_base.S"
- .data
-error_string:
- .string "memcpy: prevented write past end of buffer"
+#include "memcpy_common.S"
diff --git a/libc/arch-arm/cortex-a15/bionic/memcpy_base.S b/libc/arch-arm/cortex-a15/bionic/memcpy_base.S
index 2a73852..aac737d 100644
--- a/libc/arch-arm/cortex-a15/bionic/memcpy_base.S
+++ b/libc/arch-arm/cortex-a15/bionic/memcpy_base.S
@@ -53,11 +53,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-ENTRY_PRIVATE(MEMCPY_BASE)
- .cfi_def_cfa_offset 8
- .cfi_rel_offset r0, 0
- .cfi_rel_offset lr, 4
-
+.L_memcpy_base:
// Assumes that n >= 0, and dst, src are valid pointers.
// For any sizes less than 832 use the neon code that doesn't
// care about the src alignment. This avoids any checks
@@ -168,12 +164,6 @@ ENTRY_PRIVATE(MEMCPY_BASE)
eor r3, r0, r1
ands r3, r3, #0x3
bne .L_copy_unknown_alignment
-END(MEMCPY_BASE)
-
-ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED)
- .cfi_def_cfa_offset 8
- .cfi_rel_offset r0, 0
- .cfi_rel_offset lr, 4
// To try and improve performance, stack layout changed,
// i.e., not keeping the stack looking like users expect
@@ -185,7 +175,7 @@ ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED)
strd r6, r7, [sp, #-8]!
.cfi_adjust_cfa_offset 8
.cfi_rel_offset r6, 0
- .cfi_rel_offset r7, 0
+ .cfi_rel_offset r7, 4
strd r8, r9, [sp, #-8]!
.cfi_adjust_cfa_offset 8
.cfi_rel_offset r8, 0
@@ -291,10 +281,28 @@ ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED)
// Restore registers: optimized pop {r0, pc}
ldrd r8, r9, [sp], #8
+ .cfi_adjust_cfa_offset -8
+ .cfi_restore r8
+ .cfi_restore r9
ldrd r6, r7, [sp], #8
+ .cfi_adjust_cfa_offset -8
+ .cfi_restore r6
+ .cfi_restore r7
ldrd r4, r5, [sp], #8
+ .cfi_adjust_cfa_offset -8
+ .cfi_restore r4
+ .cfi_restore r5
pop {r0, pc}
+ // Put the cfi directives back for the below instructions.
+ .cfi_adjust_cfa_offset 24
+ .cfi_rel_offset r4, 0
+ .cfi_rel_offset r5, 4
+ .cfi_rel_offset r6, 8
+ .cfi_rel_offset r7, 12
+ .cfi_rel_offset r8, 16
+ .cfi_rel_offset r9, 20
+
.L_dst_not_word_aligned:
// Align dst to word.
rsb ip, ip, #4
@@ -315,4 +323,12 @@ ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED)
// Src is guaranteed to be at least word aligned by this point.
b .L_word_aligned
-END(MEMCPY_BASE_ALIGNED)
+
+ // Undo any cfi directives from above.
+ .cfi_adjust_cfa_offset -24
+ .cfi_restore r4
+ .cfi_restore r5
+ .cfi_restore r6
+ .cfi_restore r7
+ .cfi_restore r8
+ .cfi_restore r9
diff --git a/libc/arch-arm/cortex-a15/bionic/memcpy_common.S b/libc/arch-arm/cortex-a15/bionic/memcpy_common.S
new file mode 100644
index 0000000..464fb46
--- /dev/null
+++ b/libc/arch-arm/cortex-a15/bionic/memcpy_common.S
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Copyright (c) 2013 ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <private/bionic_asm.h>
+#include <private/libc_events.h>
+
+ .text
+ .syntax unified
+ .fpu neon
+
+ENTRY(__memcpy_chk)
+ cmp r2, r3
+ bhi .L_memcpy_chk_fail
+
+ // Fall through to memcpy...
+END(__memcpy_chk)
+
+// Prototype: void *memcpy (void *dst, const void *src, size_t count).
+ENTRY(memcpy)
+ pld [r1, #64]
+ push {r0, lr}
+ .cfi_def_cfa_offset 8
+ .cfi_rel_offset r0, 0
+ .cfi_rel_offset lr, 4
+
+#include MEMCPY_BASE
+
+ // Undo the cfi instructions from above.
+ .cfi_def_cfa_offset 0
+ .cfi_restore r0
+ .cfi_restore lr
+.L_memcpy_chk_fail:
+ // Preserve lr for backtrace.
+ push {lr}
+ .cfi_adjust_cfa_offset 4
+ .cfi_rel_offset lr, 0
+
+ ldr r0, error_message
+ ldr r1, error_code
+1:
+ add r0, pc
+ bl __fortify_chk_fail
+error_code:
+ .word BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW
+error_message:
+ .word error_string-(1b+8)
+END(memcpy)
+
+ .data
+error_string:
+ .string "memcpy: prevented write past end of buffer"
diff --git a/libc/arch-arm/cortex-a15/bionic/strcat.S b/libc/arch-arm/cortex-a15/bionic/strcat.S
index b95be94..157cc9f 100644
--- a/libc/arch-arm/cortex-a15/bionic/strcat.S
+++ b/libc/arch-arm/cortex-a15/bionic/strcat.S
@@ -70,7 +70,7 @@
.macro m_scan_byte
ldrb r3, [r0]
- cbz r3, strcat_r0_scan_done
+ cbz r3, .L_strcat_r0_scan_done
add r0, #1
.endm // m_scan_byte
@@ -84,10 +84,10 @@ ENTRY(strcat)
// Quick check to see if src is empty.
ldrb r2, [r1]
pld [r1, #0]
- cbnz r2, strcat_continue
+ cbnz r2, .L_strcat_continue
bx lr
-strcat_continue:
+.L_strcat_continue:
// To speed up really small dst strings, unroll checking the first 4 bytes.
m_push
m_scan_byte
@@ -96,95 +96,102 @@ strcat_continue:
m_scan_byte
ands r3, r0, #7
- beq strcat_mainloop
+ beq .L_strcat_mainloop
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
- beq strcat_align_to_32
+ beq .L_strcat_align_to_32
ldrb r5, [r0]
- cbz r5, strcat_r0_scan_done
+ cbz r5, .L_strcat_r0_scan_done
add r0, r0, #1
-strcat_align_to_32:
- bcc strcat_align_to_64
+.L_strcat_align_to_32:
+ bcc .L_strcat_align_to_64
ldrb r2, [r0]
- cbz r2, strcat_r0_scan_done
+ cbz r2, .L_strcat_r0_scan_done
add r0, r0, #1
ldrb r4, [r0]
- cbz r4, strcat_r0_scan_done
+ cbz r4, .L_strcat_r0_scan_done
add r0, r0, #1
-strcat_align_to_64:
+.L_strcat_align_to_64:
tst r3, #4
- beq strcat_mainloop
+ beq .L_strcat_mainloop
ldr r3, [r0], #4
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcat_zero_in_second_register
- b strcat_mainloop
+ bne .L_strcat_zero_in_second_register
+ b .L_strcat_mainloop
-strcat_r0_scan_done:
+.L_strcat_r0_scan_done:
// For short copies, hard-code checking the first 8 bytes since this
// new code doesn't win until after about 8 bytes.
- m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue
-
-strcpy_finish:
+ m_copy_byte reg=r2, cmd=cbz, label=.L_strcpy_finish
+ m_copy_byte reg=r3, cmd=cbz, label=.L_strcpy_finish
+ m_copy_byte reg=r4, cmd=cbz, label=.L_strcpy_finish
+ m_copy_byte reg=r5, cmd=cbz, label=.L_strcpy_finish
+ m_copy_byte reg=r2, cmd=cbz, label=.L_strcpy_finish
+ m_copy_byte reg=r3, cmd=cbz, label=.L_strcpy_finish
+ m_copy_byte reg=r4, cmd=cbz, label=.L_strcpy_finish
+ m_copy_byte reg=r5, cmd=cbnz, label=.L_strcpy_continue
+
+.L_strcpy_finish:
m_pop
-strcpy_continue:
+.L_strcpy_continue:
ands r3, r0, #7
- beq strcpy_check_src_align
+ beq .L_strcpy_check_src_align
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
- beq strcpy_align_to_32
+ beq .L_strcpy_align_to_32
ldrb r2, [r1], #1
strb r2, [r0], #1
- cbz r2, strcpy_complete
+ cbz r2, .L_strcpy_complete
-strcpy_align_to_32:
- bcc strcpy_align_to_64
+.L_strcpy_align_to_32:
+ bcc .L_strcpy_align_to_64
ldrb r2, [r1], #1
strb r2, [r0], #1
- cbz r2, strcpy_complete
+ cbz r2, .L_strcpy_complete
ldrb r2, [r1], #1
strb r2, [r0], #1
- cbz r2, strcpy_complete
+ cbz r2, .L_strcpy_complete
-strcpy_align_to_64:
+.L_strcpy_align_to_64:
tst r3, #4
- beq strcpy_check_src_align
- ldr r2, [r1], #4
-
- sub ip, r2, #0x01010101
- bic ip, ip, r2
- ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
- str r2, [r0], #4
+ beq .L_strcpy_check_src_align
+ // Read one byte at a time since we don't know the src alignment
+ // and we don't want to read into a different page.
+ ldrb r2, [r1], #1
+ strb r2, [r0], #1
+ cbz r2, .L_strcpy_complete
+ ldrb r2, [r1], #1
+ strb r2, [r0], #1
+ cbz r2, .L_strcpy_complete
+ ldrb r2, [r1], #1
+ strb r2, [r0], #1
+ cbz r2, .L_strcpy_complete
+ ldrb r2, [r1], #1
+ strb r2, [r0], #1
+ cbz r2, .L_strcpy_complete
-strcpy_check_src_align:
+.L_strcpy_check_src_align:
// At this point dst is aligned to a double word, check if src
// is also aligned to a double word.
ands r3, r1, #7
- bne strcpy_unaligned_copy
+ bne .L_strcpy_unaligned_copy
.p2align 2
-strcpy_mainloop:
+.L_strcpy_mainloop:
ldrd r2, r3, [r1], #8
pld [r1, #64]
@@ -192,128 +199,128 @@ strcpy_mainloop:
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
+ bne .L_strcpy_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcpy_zero_in_second_register
+ bne .L_strcpy_zero_in_second_register
strd r2, r3, [r0], #8
- b strcpy_mainloop
+ b .L_strcpy_mainloop
-strcpy_complete:
+.L_strcpy_complete:
m_pop
-strcpy_zero_in_first_register:
+.L_strcpy_zero_in_first_register:
lsls lr, ip, #17
- bne strcpy_copy1byte
- bcs strcpy_copy2bytes
+ bne .L_strcpy_copy1byte
+ bcs .L_strcpy_copy2bytes
lsls ip, ip, #1
- bne strcpy_copy3bytes
+ bne .L_strcpy_copy3bytes
-strcpy_copy4bytes:
+.L_strcpy_copy4bytes:
// Copy 4 bytes to the destiniation.
str r2, [r0]
m_pop
-strcpy_copy1byte:
+.L_strcpy_copy1byte:
strb r2, [r0]
m_pop
-strcpy_copy2bytes:
+.L_strcpy_copy2bytes:
strh r2, [r0]
m_pop
-strcpy_copy3bytes:
+.L_strcpy_copy3bytes:
strh r2, [r0], #2
lsr r2, #16
strb r2, [r0]
m_pop
-strcpy_zero_in_second_register:
+.L_strcpy_zero_in_second_register:
lsls lr, ip, #17
- bne strcpy_copy5bytes
- bcs strcpy_copy6bytes
+ bne .L_strcpy_copy5bytes
+ bcs .L_strcpy_copy6bytes
lsls ip, ip, #1
- bne strcpy_copy7bytes
+ bne .L_strcpy_copy7bytes
// Copy 8 bytes to the destination.
strd r2, r3, [r0]
m_pop
-strcpy_copy5bytes:
+.L_strcpy_copy5bytes:
str r2, [r0], #4
strb r3, [r0]
m_pop
-strcpy_copy6bytes:
+.L_strcpy_copy6bytes:
str r2, [r0], #4
strh r3, [r0]
m_pop
-strcpy_copy7bytes:
+.L_strcpy_copy7bytes:
str r2, [r0], #4
strh r3, [r0], #2
lsr r3, #16
strb r3, [r0]
m_pop
-strcpy_unaligned_copy:
+.L_strcpy_unaligned_copy:
// Dst is aligned to a double word, while src is at an unknown alignment.
// There are 7 different versions of the unaligned copy code
// to prevent overreading the src. The mainloop of every single version
// will store 64 bits per loop. The difference is how much of src can
// be read without potentially crossing a page boundary.
tbb [pc, r3]
-strcpy_unaligned_branchtable:
+.L_strcpy_unaligned_branchtable:
.byte 0
- .byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2)
- .byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2)
- .byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2)
- .byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2)
- .byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2)
- .byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2)
- .byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2)
+ .byte ((.L_strcpy_unalign7 - .L_strcpy_unaligned_branchtable)/2)
+ .byte ((.L_strcpy_unalign6 - .L_strcpy_unaligned_branchtable)/2)
+ .byte ((.L_strcpy_unalign5 - .L_strcpy_unaligned_branchtable)/2)
+ .byte ((.L_strcpy_unalign4 - .L_strcpy_unaligned_branchtable)/2)
+ .byte ((.L_strcpy_unalign3 - .L_strcpy_unaligned_branchtable)/2)
+ .byte ((.L_strcpy_unalign2 - .L_strcpy_unaligned_branchtable)/2)
+ .byte ((.L_strcpy_unalign1 - .L_strcpy_unaligned_branchtable)/2)
.p2align 2
// Can read 7 bytes before possibly crossing a page.
-strcpy_unalign7:
+.L_strcpy_unalign7:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
+ bne .L_strcpy_zero_in_first_register
ldrb r3, [r1]
- cbz r3, strcpy_unalign7_copy5bytes
+ cbz r3, .L_strcpy_unalign7_copy5bytes
ldrb r4, [r1, #1]
- cbz r4, strcpy_unalign7_copy6bytes
+ cbz r4, .L_strcpy_unalign7_copy6bytes
ldrb r5, [r1, #2]
- cbz r5, strcpy_unalign7_copy7bytes
+ cbz r5, .L_strcpy_unalign7_copy7bytes
ldr r3, [r1], #4
pld [r1, #64]
lsrs ip, r3, #24
strd r2, r3, [r0], #8
- beq strcpy_unalign_return
- b strcpy_unalign7
+ beq .L_strcpy_unalign_return
+ b .L_strcpy_unalign7
-strcpy_unalign7_copy5bytes:
+.L_strcpy_unalign7_copy5bytes:
str r2, [r0], #4
strb r3, [r0]
-strcpy_unalign_return:
+.L_strcpy_unalign_return:
m_pop
-strcpy_unalign7_copy6bytes:
+.L_strcpy_unalign7_copy6bytes:
str r2, [r0], #4
strb r3, [r0], #1
strb r4, [r0], #1
m_pop
-strcpy_unalign7_copy7bytes:
+.L_strcpy_unalign7_copy7bytes:
str r2, [r0], #4
strb r3, [r0], #1
strb r4, [r0], #1
@@ -322,41 +329,41 @@ strcpy_unalign7_copy7bytes:
.p2align 2
// Can read 6 bytes before possibly crossing a page.
-strcpy_unalign6:
+.L_strcpy_unalign6:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
+ bne .L_strcpy_zero_in_first_register
ldrb r4, [r1]
- cbz r4, strcpy_unalign_copy5bytes
+ cbz r4, .L_strcpy_unalign_copy5bytes
ldrb r5, [r1, #1]
- cbz r5, strcpy_unalign_copy6bytes
+ cbz r5, .L_strcpy_unalign_copy6bytes
ldr r3, [r1], #4
pld [r1, #64]
tst r3, #0xff0000
- beq strcpy_copy7bytes
+ beq .L_strcpy_copy7bytes
lsrs ip, r3, #24
strd r2, r3, [r0], #8
- beq strcpy_unalign_return
- b strcpy_unalign6
+ beq .L_strcpy_unalign_return
+ b .L_strcpy_unalign6
.p2align 2
// Can read 5 bytes before possibly crossing a page.
-strcpy_unalign5:
+.L_strcpy_unalign5:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
+ bne .L_strcpy_zero_in_first_register
ldrb r4, [r1]
- cbz r4, strcpy_unalign_copy5bytes
+ cbz r4, .L_strcpy_unalign_copy5bytes
ldr r3, [r1], #4
@@ -365,17 +372,17 @@ strcpy_unalign5:
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcpy_zero_in_second_register
+ bne .L_strcpy_zero_in_second_register
strd r2, r3, [r0], #8
- b strcpy_unalign5
+ b .L_strcpy_unalign5
-strcpy_unalign_copy5bytes:
+.L_strcpy_unalign_copy5bytes:
str r2, [r0], #4
strb r4, [r0]
m_pop
-strcpy_unalign_copy6bytes:
+.L_strcpy_unalign_copy6bytes:
str r2, [r0], #4
strb r4, [r0], #1
strb r5, [r0]
@@ -383,13 +390,13 @@ strcpy_unalign_copy6bytes:
.p2align 2
// Can read 4 bytes before possibly crossing a page.
-strcpy_unalign4:
+.L_strcpy_unalign4:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
+ bne .L_strcpy_zero_in_first_register
ldr r3, [r1], #4
pld [r1, #64]
@@ -397,20 +404,20 @@ strcpy_unalign4:
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcpy_zero_in_second_register
+ bne .L_strcpy_zero_in_second_register
strd r2, r3, [r0], #8
- b strcpy_unalign4
+ b .L_strcpy_unalign4
.p2align 2
// Can read 3 bytes before possibly crossing a page.
-strcpy_unalign3:
+.L_strcpy_unalign3:
ldrb r2, [r1]
- cbz r2, strcpy_unalign3_copy1byte
+ cbz r2, .L_strcpy_unalign3_copy1byte
ldrb r3, [r1, #1]
- cbz r3, strcpy_unalign3_copy2bytes
+ cbz r3, .L_strcpy_unalign3_copy2bytes
ldrb r4, [r1, #2]
- cbz r4, strcpy_unalign3_copy3bytes
+ cbz r4, .L_strcpy_unalign3_copy3bytes
ldr r2, [r1], #4
ldr r3, [r1], #4
@@ -418,26 +425,26 @@ strcpy_unalign3:
pld [r1, #64]
lsrs lr, r2, #24
- beq strcpy_copy4bytes
+ beq .L_strcpy_copy4bytes
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcpy_zero_in_second_register
+ bne .L_strcpy_zero_in_second_register
strd r2, r3, [r0], #8
- b strcpy_unalign3
+ b .L_strcpy_unalign3
-strcpy_unalign3_copy1byte:
+.L_strcpy_unalign3_copy1byte:
strb r2, [r0]
m_pop
-strcpy_unalign3_copy2bytes:
+.L_strcpy_unalign3_copy2bytes:
strb r2, [r0], #1
strb r3, [r0]
m_pop
-strcpy_unalign3_copy3bytes:
+.L_strcpy_unalign3_copy3bytes:
strb r2, [r0], #1
strb r3, [r0], #1
strb r4, [r0]
@@ -445,34 +452,34 @@ strcpy_unalign3_copy3bytes:
.p2align 2
// Can read 2 bytes before possibly crossing a page.
-strcpy_unalign2:
+.L_strcpy_unalign2:
ldrb r2, [r1]
- cbz r2, strcpy_unalign_copy1byte
+ cbz r2, .L_strcpy_unalign_copy1byte
ldrb r4, [r1, #1]
- cbz r4, strcpy_unalign_copy2bytes
+ cbz r4, .L_strcpy_unalign_copy2bytes
ldr r2, [r1], #4
ldr r3, [r1], #4
pld [r1, #64]
tst r2, #0xff0000
- beq strcpy_copy3bytes
+ beq .L_strcpy_copy3bytes
lsrs ip, r2, #24
- beq strcpy_copy4bytes
+ beq .L_strcpy_copy4bytes
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcpy_zero_in_second_register
+ bne .L_strcpy_zero_in_second_register
strd r2, r3, [r0], #8
- b strcpy_unalign2
+ b .L_strcpy_unalign2
.p2align 2
// Can read 1 byte before possibly crossing a page.
-strcpy_unalign1:
+.L_strcpy_unalign1:
ldrb r2, [r1]
- cbz r2, strcpy_unalign_copy1byte
+ cbz r2, .L_strcpy_unalign_copy1byte
ldr r2, [r1], #4
ldr r3, [r1], #4
@@ -482,27 +489,27 @@ strcpy_unalign1:
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
+ bne .L_strcpy_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcpy_zero_in_second_register
+ bne .L_strcpy_zero_in_second_register
strd r2, r3, [r0], #8
- b strcpy_unalign1
+ b .L_strcpy_unalign1
-strcpy_unalign_copy1byte:
+.L_strcpy_unalign_copy1byte:
strb r2, [r0]
m_pop
-strcpy_unalign_copy2bytes:
+.L_strcpy_unalign_copy2bytes:
strb r2, [r0], #1
strb r4, [r0]
m_pop
.p2align 2
-strcat_mainloop:
+.L_strcat_mainloop:
ldrd r2, r3, [r0], #8
pld [r0, #64]
@@ -510,59 +517,59 @@ strcat_mainloop:
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcat_zero_in_first_register
+ bne .L_strcat_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcat_zero_in_second_register
- b strcat_mainloop
+ bne .L_strcat_zero_in_second_register
+ b .L_strcat_mainloop
-strcat_zero_in_first_register:
+.L_strcat_zero_in_first_register:
// Prefetch the src now, it's going to be used soon.
pld [r1, #0]
lsls lr, ip, #17
- bne strcat_sub8
- bcs strcat_sub7
+ bne .L_strcat_sub8
+ bcs .L_strcat_sub7
lsls ip, ip, #1
- bne strcat_sub6
+ bne .L_strcat_sub6
sub r0, r0, #5
- b strcat_r0_scan_done
+ b .L_strcat_r0_scan_done
-strcat_sub8:
+.L_strcat_sub8:
sub r0, r0, #8
- b strcat_r0_scan_done
+ b .L_strcat_r0_scan_done
-strcat_sub7:
+.L_strcat_sub7:
sub r0, r0, #7
- b strcat_r0_scan_done
+ b .L_strcat_r0_scan_done
-strcat_sub6:
+.L_strcat_sub6:
sub r0, r0, #6
- b strcat_r0_scan_done
+ b .L_strcat_r0_scan_done
-strcat_zero_in_second_register:
+.L_strcat_zero_in_second_register:
// Prefetch the src now, it's going to be used soon.
pld [r1, #0]
lsls lr, ip, #17
- bne strcat_sub4
- bcs strcat_sub3
+ bne .L_strcat_sub4
+ bcs .L_strcat_sub3
lsls ip, ip, #1
- bne strcat_sub2
+ bne .L_strcat_sub2
sub r0, r0, #1
- b strcat_r0_scan_done
+ b .L_strcat_r0_scan_done
-strcat_sub4:
+.L_strcat_sub4:
sub r0, r0, #4
- b strcat_r0_scan_done
+ b .L_strcat_r0_scan_done
-strcat_sub3:
+.L_strcat_sub3:
sub r0, r0, #3
- b strcat_r0_scan_done
+ b .L_strcat_r0_scan_done
-strcat_sub2:
+.L_strcat_sub2:
sub r0, r0, #2
- b strcat_r0_scan_done
+ b .L_strcat_r0_scan_done
END(strcat)
diff --git a/libc/arch-arm/cortex-a15/bionic/string_copy.S b/libc/arch-arm/cortex-a15/bionic/string_copy.S
index 20f0e91..92d1c98 100644
--- a/libc/arch-arm/cortex-a15/bionic/string_copy.S
+++ b/libc/arch-arm/cortex-a15/bionic/string_copy.S
@@ -149,13 +149,20 @@ ENTRY(strcpy)
.Lstringcopy_align_to_64:
tst r3, #4
beq .Lstringcopy_check_src_align
- ldr r2, [r1], #4
-
- sub ip, r2, #0x01010101
- bic ip, ip, r2
- ands ip, ip, #0x80808080
- bne .Lstringcopy_zero_in_first_register
- str r2, [r0], #4
+ // Read one byte at a time since we don't have any idea about the alignment
+ // of the source and we don't want to read into a different page.
+ ldrb r2, [r1], #1
+ strb r2, [r0], #1
+ cbz r2, .Lstringcopy_complete
+ ldrb r2, [r1], #1
+ strb r2, [r0], #1
+ cbz r2, .Lstringcopy_complete
+ ldrb r2, [r1], #1
+ strb r2, [r0], #1
+ cbz r2, .Lstringcopy_complete
+ ldrb r2, [r1], #1
+ strb r2, [r0], #1
+ cbz r2, .Lstringcopy_complete
.Lstringcopy_check_src_align:
// At this point dst is aligned to a double word, check if src
diff --git a/libc/arch-arm/cortex-a15/bionic/strlen.S b/libc/arch-arm/cortex-a15/bionic/strlen.S
index 9a0ce62..4fd6284 100644
--- a/libc/arch-arm/cortex-a15/bionic/strlen.S
+++ b/libc/arch-arm/cortex-a15/bionic/strlen.S
@@ -65,38 +65,38 @@ ENTRY(strlen)
mov r1, r0
ands r3, r0, #7
- beq mainloop
+ beq .L_mainloop
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
- beq align_to_32
+ beq .L_align_to_32
ldrb r2, [r1], #1
- cbz r2, update_count_and_return
+ cbz r2, .L_update_count_and_return
-align_to_32:
- bcc align_to_64
+.L_align_to_32:
+ bcc .L_align_to_64
ands ip, r3, #2
- beq align_to_64
+ beq .L_align_to_64
ldrb r2, [r1], #1
- cbz r2, update_count_and_return
+ cbz r2, .L_update_count_and_return
ldrb r2, [r1], #1
- cbz r2, update_count_and_return
+ cbz r2, .L_update_count_and_return
-align_to_64:
+.L_align_to_64:
tst r3, #4
- beq mainloop
+ beq .L_mainloop
ldr r3, [r1], #4
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne zero_in_second_register
+ bne .L_zero_in_second_register
.p2align 2
-mainloop:
+.L_mainloop:
ldrd r2, r3, [r1], #8
pld [r1, #64]
@@ -104,62 +104,62 @@ mainloop:
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne zero_in_first_register
+ bne .L_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne zero_in_second_register
- b mainloop
+ bne .L_zero_in_second_register
+ b .L_mainloop
-update_count_and_return:
+.L_update_count_and_return:
sub r0, r1, r0
sub r0, r0, #1
bx lr
-zero_in_first_register:
+.L_zero_in_first_register:
sub r0, r1, r0
lsls r3, ip, #17
- bne sub8_and_return
- bcs sub7_and_return
+ bne .L_sub8_and_return
+ bcs .L_sub7_and_return
lsls ip, ip, #1
- bne sub6_and_return
+ bne .L_sub6_and_return
sub r0, r0, #5
bx lr
-sub8_and_return:
+.L_sub8_and_return:
sub r0, r0, #8
bx lr
-sub7_and_return:
+.L_sub7_and_return:
sub r0, r0, #7
bx lr
-sub6_and_return:
+.L_sub6_and_return:
sub r0, r0, #6
bx lr
-zero_in_second_register:
+.L_zero_in_second_register:
sub r0, r1, r0
lsls r3, ip, #17
- bne sub4_and_return
- bcs sub3_and_return
+ bne .L_sub4_and_return
+ bcs .L_sub3_and_return
lsls ip, ip, #1
- bne sub2_and_return
+ bne .L_sub2_and_return
sub r0, r0, #1
bx lr
-sub4_and_return:
+.L_sub4_and_return:
sub r0, r0, #4
bx lr
-sub3_and_return:
+.L_sub3_and_return:
sub r0, r0, #3
bx lr
-sub2_and_return:
+.L_sub2_and_return:
sub r0, r0, #2
bx lr
END(strlen)
diff --git a/libc/arch-arm/cortex-a15/cortex-a15.mk b/libc/arch-arm/cortex-a15/cortex-a15.mk
index 6fa3270..202a3bf 100644
--- a/libc/arch-arm/cortex-a15/cortex-a15.mk
+++ b/libc/arch-arm/cortex-a15/cortex-a15.mk
@@ -10,6 +10,7 @@ libc_bionic_src_files_arm += \
arch-arm/cortex-a15/bionic/strlen.S \
libc_bionic_src_files_arm += \
+ arch-arm/generic/bionic/memchr.S \
arch-arm/generic/bionic/memcmp.S \
libc_bionic_src_files_arm += \
diff --git a/libc/arch-arm/cortex-a53.a57/cortex-a53.a57.mk b/libc/arch-arm/cortex-a53.a57/cortex-a53.a57.mk
new file mode 100644
index 0000000..5d7efc6
--- /dev/null
+++ b/libc/arch-arm/cortex-a53.a57/cortex-a53.a57.mk
@@ -0,0 +1,22 @@
+# This file represents the best optimized routines that are the middle
+# ground when running on a big/little system that is cortex-a57/cortex-a53.
+# The cortex-a7 optimized routines, and the cortex-a53 optimized routines
+# decrease performance on cortex-a57 processors by as much as 20%.
+
+libc_bionic_src_files_arm += \
+ arch-arm/cortex-a15/bionic/memcpy.S \
+ arch-arm/cortex-a15/bionic/memset.S \
+ arch-arm/cortex-a15/bionic/stpcpy.S \
+ arch-arm/cortex-a15/bionic/strcat.S \
+ arch-arm/cortex-a15/bionic/__strcat_chk.S \
+ arch-arm/cortex-a15/bionic/strcmp.S \
+ arch-arm/cortex-a15/bionic/strcpy.S \
+ arch-arm/cortex-a15/bionic/__strcpy_chk.S \
+ arch-arm/cortex-a15/bionic/strlen.S \
+
+libc_bionic_src_files_arm += \
+ arch-arm/generic/bionic/memcmp.S \
+ arch-arm/generic/bionic/memchr.S
+
+libc_bionic_src_files_arm += \
+ arch-arm/denver/bionic/memmove.S \
diff --git a/libc/arch-arm/cortex-a53/bionic/__strcat_chk.S b/libc/arch-arm/cortex-a53/bionic/__strcat_chk.S
new file mode 100644
index 0000000..c5bc98a
--- /dev/null
+++ b/libc/arch-arm/cortex-a53/bionic/__strcat_chk.S
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+// Indicate which memcpy base file to include.
+#define MEMCPY_BASE "arch-arm/cortex-a53/bionic/memcpy_base.S"
+
+#include "arch-arm/cortex-a15/bionic/__strcat_chk_common.S"
diff --git a/libc/arch-arm/cortex-a53/bionic/__strcpy_chk.S b/libc/arch-arm/cortex-a53/bionic/__strcpy_chk.S
new file mode 100644
index 0000000..1f8945d
--- /dev/null
+++ b/libc/arch-arm/cortex-a53/bionic/__strcpy_chk.S
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+// Indicate which memcpy base file to include.
+#define MEMCPY_BASE "arch-arm/cortex-a53/bionic/memcpy_base.S"
+
+#include "arch-arm/cortex-a15/bionic/__strcpy_chk_common.S"
diff --git a/libc/arch-arm/cortex-a53/bionic/memcpy.S b/libc/arch-arm/cortex-a53/bionic/memcpy.S
new file mode 100644
index 0000000..664f574
--- /dev/null
+++ b/libc/arch-arm/cortex-a53/bionic/memcpy.S
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+// Indicate which memcpy base file to include.
+#define MEMCPY_BASE "arch-arm/cortex-a53/bionic/memcpy_base.S"
+
+#include "arch-arm/cortex-a15/bionic/memcpy_common.S"
diff --git a/libc/arch-arm/cortex-a53/bionic/memcpy_base.S b/libc/arch-arm/cortex-a53/bionic/memcpy_base.S
new file mode 100644
index 0000000..2749fc8
--- /dev/null
+++ b/libc/arch-arm/cortex-a53/bionic/memcpy_base.S
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Copyright (c) 2013 ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+.L_memcpy_base:
+ // Assumes that n >= 0, and dst, src are valid pointers.
+ cmp r2, #16
+ blo .L_copy_less_than_16_unknown_align
+
+.L_copy_unknown_alignment:
+ // Unknown alignment of src and dst.
+ // Assumes that the first few bytes have already been prefetched.
+
+ // Align destination to 128 bits. The mainloop store instructions
+ // require this alignment or they will throw an exception.
+ rsb r3, r0, #0
+ ands r3, r3, #0xF
+ beq 2f
+
+ // Copy up to 15 bytes (count in r3).
+ sub r2, r2, r3
+ movs ip, r3, lsl #31
+
+ itt mi
+ ldrbmi lr, [r1], #1
+ strbmi lr, [r0], #1
+ itttt cs
+ ldrbcs ip, [r1], #1
+ ldrbcs lr, [r1], #1
+ strbcs ip, [r0], #1
+ strbcs lr, [r0], #1
+
+ movs ip, r3, lsl #29
+ bge 1f
+ // Copies 4 bytes, dst 32 bits aligned before, at least 64 bits after.
+ vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]!
+ vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]!
+1: bcc 2f
+ // Copies 8 bytes, dst 64 bits aligned before, at least 128 bits after.
+ vld1.8 {d0}, [r1]!
+ vst1.8 {d0}, [r0, :64]!
+
+2: // Make sure we have at least 64 bytes to copy.
+ subs r2, r2, #64
+ blo 2f
+
+1: // The main loop copies 64 bytes at a time.
+ vld1.8 {d0 - d3}, [r1]!
+ vld1.8 {d4 - d7}, [r1]!
+ subs r2, r2, #64
+ vstmia r0!, {d0 - d7}
+ pld [r1, #(64*10)]
+ bhs 1b
+
+2: // Fix-up the remaining count and make sure we have >= 32 bytes left.
+ adds r2, r2, #32
+ blo 3f
+
+ // 32 bytes. These cache lines were already preloaded.
+ vld1.8 {d0 - d3}, [r1]!
+ sub r2, r2, #32
+ vst1.8 {d0 - d3}, [r0, :128]!
+3: // Less than 32 left.
+ add r2, r2, #32
+ tst r2, #0x10
+ beq .L_copy_less_than_16_unknown_align
+ // Copies 16 bytes, destination 128 bits aligned.
+ vld1.8 {d0, d1}, [r1]!
+ vst1.8 {d0, d1}, [r0, :128]!
+
+.L_copy_less_than_16_unknown_align:
+ // Copy up to 15 bytes (count in r2).
+ movs ip, r2, lsl #29
+ bcc 1f
+ vld1.8 {d0}, [r1]!
+ vst1.8 {d0}, [r0]!
+1: bge 2f
+ vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]!
+ vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]!
+
+2: // Copy 0 to 4 bytes.
+ lsls r2, r2, #31
+ itt ne
+ ldrbne lr, [r1], #1
+ strbne lr, [r0], #1
+ itttt cs
+ ldrbcs ip, [r1], #1
+ ldrbcs lr, [r1]
+ strbcs ip, [r0], #1
+ strbcs lr, [r0]
+
+ pop {r0, pc}
diff --git a/libc/arch-arm/cortex-a53/cortex-a53.mk b/libc/arch-arm/cortex-a53/cortex-a53.mk
index b5c337c..14aaa71 100644
--- a/libc/arch-arm/cortex-a53/cortex-a53.mk
+++ b/libc/arch-arm/cortex-a53/cortex-a53.mk
@@ -1 +1,21 @@
-include bionic/libc/arch-arm/cortex-a7/cortex-a7.mk
+libc_bionic_src_files_arm += \
+ arch-arm/cortex-a53/bionic/memcpy.S \
+ arch-arm/cortex-a53/bionic/__strcat_chk.S \
+ arch-arm/cortex-a53/bionic/__strcpy_chk.S \
+
+libc_bionic_src_files_arm += \
+ arch-arm/cortex-a7/bionic/memset.S \
+
+libc_bionic_src_files_arm += \
+ arch-arm/cortex-a15/bionic/stpcpy.S \
+ arch-arm/cortex-a15/bionic/strcat.S \
+ arch-arm/cortex-a15/bionic/strcmp.S \
+ arch-arm/cortex-a15/bionic/strcpy.S \
+ arch-arm/cortex-a15/bionic/strlen.S \
+
+libc_bionic_src_files_arm += \
+ arch-arm/generic/bionic/memchr.S \
+ arch-arm/generic/bionic/memcmp.S \
+
+libc_bionic_src_files_arm += \
+ arch-arm/denver/bionic/memmove.S \
diff --git a/libc/arch-arm/cortex-a7/bionic/memset.S b/libc/arch-arm/cortex-a7/bionic/memset.S
new file mode 100644
index 0000000..6365b06
--- /dev/null
+++ b/libc/arch-arm/cortex-a7/bionic/memset.S
@@ -0,0 +1,180 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <machine/cpu-features.h>
+#include <private/bionic_asm.h>
+#include <private/libc_events.h>
+
+ /*
+ * Optimized memset() for ARM.
+ *
+ * memset() returns its first argument.
+ */
+
+ .fpu neon
+ .syntax unified
+
+ENTRY(__memset_chk)
+ cmp r2, r3
+ bls .L_done
+
+ // Preserve lr for backtrace.
+ push {lr}
+ .cfi_def_cfa_offset 4
+ .cfi_rel_offset lr, 0
+
+ ldr r0, error_message
+ ldr r1, error_code
+1:
+ add r0, pc
+ bl __fortify_chk_fail
+error_code:
+ .word BIONIC_EVENT_MEMSET_BUFFER_OVERFLOW
+error_message:
+ .word error_string-(1b+8)
+END(__memset_chk)
+
+ENTRY(bzero)
+ mov r2, r1
+ mov r1, #0
+.L_done:
+ // Fall through to memset...
+END(bzero)
+
+ENTRY(memset)
+ mov r3, r0
+ // At this point only d0, d1 are going to be used below.
+ vdup.8 q0, r1
+ cmp r2, #16
+ blo .L_set_less_than_16_unknown_align
+
+.L_check_alignment:
+ // Align destination to a double word to avoid the store crossing
+ // a cache line boundary.
+ ands ip, r3, #7
+ bne .L_do_double_word_align
+
+.L_double_word_aligned:
+ // Duplicate since the less than 64 can use d2, d3.
+ vmov q1, q0
+ subs r2, #64
+ blo .L_set_less_than_64
+
+ // Duplicate the copy value so that we can store 64 bytes at a time.
+ vmov q2, q0
+ vmov q3, q0
+
+1: // Main loop stores 64 bytes at a time.
+ subs r2, #64
+ vstmia r3!, {d0 - d7}
+ bge 1b
+
+.L_set_less_than_64:
+ // Restore r2 to the count of bytes left to set.
+ add r2, #64
+ lsls ip, r2, #27
+ bcc .L_set_less_than_32
+ // Set 32 bytes.
+ vstmia r3!, {d0 - d3}
+
+.L_set_less_than_32:
+ bpl .L_set_less_than_16
+ // Set 16 bytes.
+ vstmia r3!, {d0, d1}
+
+.L_set_less_than_16:
+ // Less than 16 bytes to set.
+ lsls ip, r2, #29
+ bcc .L_set_less_than_8
+
+ // Set 8 bytes.
+ vstmia r3!, {d0}
+
+.L_set_less_than_8:
+ bpl .L_set_less_than_4
+ // Set 4 bytes
+ vst1.32 {d0[0]}, [r3]!
+
+.L_set_less_than_4:
+ lsls ip, r2, #31
+ it ne
+ strbne r1, [r3], #1
+ itt cs
+ strbcs r1, [r3], #1
+ strbcs r1, [r3]
+ bx lr
+
+.L_do_double_word_align:
+ rsb ip, ip, #8
+ sub r2, r2, ip
+
+ // Do this comparison now, otherwise we'll need to save a
+ // register to the stack since we've used all available
+ // registers.
+ cmp ip, #4
+ blo 1f
+
+ // Need to do a four byte copy.
+ movs ip, ip, lsl #31
+ it mi
+ strbmi r1, [r3], #1
+ itt cs
+ strbcs r1, [r3], #1
+ strbcs r1, [r3], #1
+ vst1.32 {d0[0]}, [r3]!
+ b .L_double_word_aligned
+
+1:
+ // No four byte copy.
+ movs ip, ip, lsl #31
+ it mi
+ strbmi r1, [r3], #1
+ itt cs
+ strbcs r1, [r3], #1
+ strbcs r1, [r3], #1
+ b .L_double_word_aligned
+
+.L_set_less_than_16_unknown_align:
+ // Set up to 15 bytes.
+ movs ip, r2, lsl #29
+ bcc 1f
+ vst1.8 {d0}, [r3]!
+1: bge 2f
+ vst1.32 {d0[0]}, [r3]!
+2: movs ip, r2, lsl #31
+ it mi
+ strbmi r1, [r3], #1
+ itt cs
+ strbcs r1, [r3], #1
+ strbcs r1, [r3], #1
+ bx lr
+END(memset)
+
+ .data
+error_string:
+ .string "memset: prevented write past end of buffer"
diff --git a/libc/arch-arm/cortex-a7/cortex-a7.mk b/libc/arch-arm/cortex-a7/cortex-a7.mk
index 9af03d9..3629a57 100644
--- a/libc/arch-arm/cortex-a7/cortex-a7.mk
+++ b/libc/arch-arm/cortex-a7/cortex-a7.mk
@@ -1 +1,19 @@
-include bionic/libc/arch-arm/cortex-a15/cortex-a15.mk
+libc_bionic_src_files_arm += \
+ arch-arm/cortex-a7/bionic/memset.S \
+
+libc_bionic_src_files_arm += \
+ arch-arm/cortex-a15/bionic/memcpy.S \
+ arch-arm/cortex-a15/bionic/stpcpy.S \
+ arch-arm/cortex-a15/bionic/strcat.S \
+ arch-arm/cortex-a15/bionic/__strcat_chk.S \
+ arch-arm/cortex-a15/bionic/strcmp.S \
+ arch-arm/cortex-a15/bionic/strcpy.S \
+ arch-arm/cortex-a15/bionic/__strcpy_chk.S \
+ arch-arm/cortex-a15/bionic/strlen.S \
+
+libc_bionic_src_files_arm += \
+ arch-arm/generic/bionic/memchr.S \
+ arch-arm/generic/bionic/memcmp.S \
+
+libc_bionic_src_files_arm += \
+ arch-arm/denver/bionic/memmove.S \
diff --git a/libc/arch-arm/cortex-a9/bionic/memcpy_base.S b/libc/arch-arm/cortex-a9/bionic/memcpy_base.S
index 5e81305..6ab5a69 100644
--- a/libc/arch-arm/cortex-a9/bionic/memcpy_base.S
+++ b/libc/arch-arm/cortex-a9/bionic/memcpy_base.S
@@ -133,8 +133,7 @@ ENTRY_PRIVATE(MEMCPY_BASE)
strbcs ip, [r0], #1
strbcs lr, [r0], #1
- ldmfd sp!, {r0, lr}
- bx lr
+ ldmfd sp!, {r0, pc}
END(MEMCPY_BASE)
ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED)
diff --git a/libc/arch-arm/cortex-a9/bionic/memset.S b/libc/arch-arm/cortex-a9/bionic/memset.S
index 8ee6ac2..b39fcc4 100644
--- a/libc/arch-arm/cortex-a9/bionic/memset.S
+++ b/libc/arch-arm/cortex-a9/bionic/memset.S
@@ -69,12 +69,9 @@ END(bzero)
ENTRY(memset)
// The neon memset only wins for less than 132.
cmp r2, #132
- bhi __memset_large_copy
-
- stmfd sp!, {r0}
- .cfi_def_cfa_offset 4
- .cfi_rel_offset r0, 0
+ bhi .L_memset_large_copy
+ mov r3, r0
vdup.8 q0, r1
/* make sure we have at least 32 bytes to write */
@@ -84,7 +81,7 @@ ENTRY(memset)
1: /* The main loop writes 32 bytes at a time */
subs r2, r2, #32
- vst1.8 {d0 - d3}, [r0]!
+ vst1.8 {d0 - d3}, [r3]!
bhs 1b
2: /* less than 32 left */
@@ -93,22 +90,20 @@ ENTRY(memset)
beq 3f
// writes 16 bytes, 128-bits aligned
- vst1.8 {d0, d1}, [r0]!
+ vst1.8 {d0, d1}, [r3]!
3: /* write up to 15-bytes (count in r2) */
movs ip, r2, lsl #29
bcc 1f
- vst1.8 {d0}, [r0]!
+ vst1.8 {d0}, [r3]!
1: bge 2f
- vst1.32 {d0[0]}, [r0]!
+ vst1.32 {d0[0]}, [r3]!
2: movs ip, r2, lsl #31
- strbmi r1, [r0], #1
- strbcs r1, [r0], #1
- strbcs r1, [r0], #1
- ldmfd sp!, {r0}
+ strbmi r1, [r3], #1
+ strbcs r1, [r3], #1
+ strbcs r1, [r3], #1
bx lr
-END(memset)
-ENTRY_PRIVATE(__memset_large_copy)
+.L_memset_large_copy:
/* compute the offset to align the destination
* offset = (4-(src&3))&3 = -src & 3
*/
@@ -136,8 +131,7 @@ ENTRY_PRIVATE(__memset_large_copy)
strbcs r1, [r0], #1
strbmi r1, [r0], #1
subs r2, r2, r3
- popls {r0, r4-r7, lr} /* return */
- bxls lr
+ popls {r0, r4-r7, pc} /* return */
/* align the destination to a cache-line */
mov r12, r1
@@ -180,9 +174,8 @@ ENTRY_PRIVATE(__memset_large_copy)
strhmi r1, [r0], #2
movs r2, r2, lsl #2
strbcs r1, [r0]
- ldmfd sp!, {r0, r4-r7, lr}
- bx lr
-END(__memset_large_copy)
+ ldmfd sp!, {r0, r4-r7, pc}
+END(memset)
.data
error_string:
diff --git a/libc/arch-arm/cortex-a9/bionic/strcat.S b/libc/arch-arm/cortex-a9/bionic/strcat.S
index f5a855e..9077a74 100644
--- a/libc/arch-arm/cortex-a9/bionic/strcat.S
+++ b/libc/arch-arm/cortex-a9/bionic/strcat.S
@@ -70,7 +70,7 @@
.macro m_scan_byte
ldrb r3, [r0]
- cbz r3, strcat_r0_scan_done
+ cbz r3, .Lstrcat_r0_scan_done
add r0, #1
.endm // m_scan_byte
@@ -84,10 +84,10 @@ ENTRY(strcat)
// Quick check to see if src is empty.
ldrb r2, [r1]
pld [r1, #0]
- cbnz r2, strcat_continue
+ cbnz r2, .Lstrcat_continue
bx lr
-strcat_continue:
+.Lstrcat_continue:
// To speed up really small dst strings, unroll checking the first 4 bytes.
m_push
m_scan_byte
@@ -96,10 +96,10 @@ strcat_continue:
m_scan_byte
ands r3, r0, #7
- bne strcat_align_src
+ bne .Lstrcat_align_src
.p2align 2
-strcat_mainloop:
+.Lstrcat_mainloop:
ldmia r0!, {r2, r3}
pld [r0, #64]
@@ -107,28 +107,28 @@ strcat_mainloop:
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcat_zero_in_first_register
+ bne .Lstrcat_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcat_zero_in_second_register
- b strcat_mainloop
+ bne .Lstrcat_zero_in_second_register
+ b .Lstrcat_mainloop
-strcat_zero_in_first_register:
+.Lstrcat_zero_in_first_register:
sub r0, r0, #4
-strcat_zero_in_second_register:
+.Lstrcat_zero_in_second_register:
// Check for zero in byte 0.
tst ip, #0x80
it ne
subne r0, r0, #4
- bne strcat_r0_scan_done
+ bne .Lstrcat_r0_scan_done
// Check for zero in byte 1.
tst ip, #0x8000
it ne
subne r0, r0, #3
- bne strcat_r0_scan_done
+ bne .Lstrcat_r0_scan_done
// Check for zero in byte 2.
tst ip, #0x800000
it ne
@@ -137,33 +137,33 @@ strcat_zero_in_second_register:
// Zero is in byte 3.
subeq r0, r0, #1
-strcat_r0_scan_done:
+.Lstrcat_r0_scan_done:
// Unroll the first 8 bytes that will be copied.
- m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
- m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue
-
-strcpy_finish:
+ m_copy_byte reg=r2, cmd=cbz, label=.Lstrcpy_finish
+ m_copy_byte reg=r3, cmd=cbz, label=.Lstrcpy_finish
+ m_copy_byte reg=r4, cmd=cbz, label=.Lstrcpy_finish
+ m_copy_byte reg=r5, cmd=cbz, label=.Lstrcpy_finish
+ m_copy_byte reg=r2, cmd=cbz, label=.Lstrcpy_finish
+ m_copy_byte reg=r3, cmd=cbz, label=.Lstrcpy_finish
+ m_copy_byte reg=r4, cmd=cbz, label=.Lstrcpy_finish
+ m_copy_byte reg=r5, cmd=cbnz, label=.Lstrcpy_continue
+
+.Lstrcpy_finish:
m_ret inst=pop
-strcpy_continue:
+.Lstrcpy_continue:
pld [r1, #0]
ands r3, r0, #7
- bne strcpy_align_dst
+ bne .Lstrcpy_align_dst
-strcpy_check_src_align:
+.Lstrcpy_check_src_align:
// At this point dst is aligned to a double word, check if src
// is also aligned to a double word.
ands r3, r1, #7
- bne strcpy_unaligned_copy
+ bne .Lstrcpy_unaligned_copy
.p2align 2
-strcpy_mainloop:
+.Lstrcpy_mainloop:
ldmia r1!, {r2, r3}
pld [r1, #64]
@@ -171,17 +171,17 @@ strcpy_mainloop:
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
+ bne .Lstrcpy_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcpy_zero_in_second_register
+ bne .Lstrcpy_zero_in_second_register
stmia r0!, {r2, r3}
- b strcpy_mainloop
+ b .Lstrcpy_mainloop
-strcpy_zero_in_first_register:
+.Lstrcpy_zero_in_first_register:
lsls lr, ip, #17
itt ne
strbne r2, [r0]
@@ -198,7 +198,7 @@ strcpy_zero_in_first_register:
strb r3, [r0]
m_ret inst=pop
-strcpy_zero_in_second_register:
+.Lstrcpy_zero_in_second_register:
lsls lr, ip, #17
ittt ne
stmiane r0!, {r2}
@@ -218,18 +218,18 @@ strcpy_zero_in_second_register:
strb r4, [r0]
m_ret inst=pop
-strcpy_align_dst:
+.Lstrcpy_align_dst:
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
- beq strcpy_align_to_32
+ beq .Lstrcpy_align_to_32
ldrb r2, [r1], #1
strb r2, [r0], #1
- cbz r2, strcpy_complete
+ cbz r2, .Lstrcpy_complete
-strcpy_align_to_32:
- bcc strcpy_align_to_64
+.Lstrcpy_align_to_32:
+ bcc .Lstrcpy_align_to_64
ldrb r4, [r1], #1
strb r4, [r0], #1
@@ -242,76 +242,83 @@ strcpy_align_to_32:
it eq
m_ret inst=popeq
-strcpy_align_to_64:
+.Lstrcpy_align_to_64:
tst r3, #4
- beq strcpy_check_src_align
- ldr r2, [r1], #4
-
- sub ip, r2, #0x01010101
- bic ip, ip, r2
- ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
- stmia r0!, {r2}
- b strcpy_check_src_align
+ beq .Lstrcpy_check_src_align
+ // Read one byte at a time since we don't know the src alignment
+ // and we don't want to read into a different page.
+ ldrb r4, [r1], #1
+ strb r4, [r0], #1
+ cbz r4, .Lstrcpy_complete
+ ldrb r5, [r1], #1
+ strb r5, [r0], #1
+ cbz r5, .Lstrcpy_complete
+ ldrb r4, [r1], #1
+ strb r4, [r0], #1
+ cbz r4, .Lstrcpy_complete
+ ldrb r5, [r1], #1
+ strb r5, [r0], #1
+ cbz r5, .Lstrcpy_complete
+ b .Lstrcpy_check_src_align
-strcpy_complete:
+.Lstrcpy_complete:
m_ret inst=pop
-strcpy_unaligned_copy:
+.Lstrcpy_unaligned_copy:
// Dst is aligned to a double word, while src is at an unknown alignment.
// There are 7 different versions of the unaligned copy code
// to prevent overreading the src. The mainloop of every single version
// will store 64 bits per loop. The difference is how much of src can
// be read without potentially crossing a page boundary.
tbb [pc, r3]
-strcpy_unaligned_branchtable:
+.Lstrcpy_unaligned_branchtable:
.byte 0
- .byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2)
- .byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2)
- .byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2)
- .byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2)
- .byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2)
- .byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2)
- .byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2)
+ .byte ((.Lstrcpy_unalign7 - .Lstrcpy_unaligned_branchtable)/2)
+ .byte ((.Lstrcpy_unalign6 - .Lstrcpy_unaligned_branchtable)/2)
+ .byte ((.Lstrcpy_unalign5 - .Lstrcpy_unaligned_branchtable)/2)
+ .byte ((.Lstrcpy_unalign4 - .Lstrcpy_unaligned_branchtable)/2)
+ .byte ((.Lstrcpy_unalign3 - .Lstrcpy_unaligned_branchtable)/2)
+ .byte ((.Lstrcpy_unalign2 - .Lstrcpy_unaligned_branchtable)/2)
+ .byte ((.Lstrcpy_unalign1 - .Lstrcpy_unaligned_branchtable)/2)
.p2align 2
// Can read 7 bytes before possibly crossing a page.
-strcpy_unalign7:
+.Lstrcpy_unalign7:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
+ bne .Lstrcpy_zero_in_first_register
ldrb r3, [r1]
- cbz r3, strcpy_unalign7_copy5bytes
+ cbz r3, .Lstrcpy_unalign7_copy5bytes
ldrb r4, [r1, #1]
- cbz r4, strcpy_unalign7_copy6bytes
+ cbz r4, .Lstrcpy_unalign7_copy6bytes
ldrb r5, [r1, #2]
- cbz r5, strcpy_unalign7_copy7bytes
+ cbz r5, .Lstrcpy_unalign7_copy7bytes
ldr r3, [r1], #4
pld [r1, #64]
lsrs ip, r3, #24
stmia r0!, {r2, r3}
- beq strcpy_unalign_return
- b strcpy_unalign7
+ beq .Lstrcpy_unalign_return
+ b .Lstrcpy_unalign7
-strcpy_unalign7_copy5bytes:
+.Lstrcpy_unalign7_copy5bytes:
stmia r0!, {r2}
strb r3, [r0]
-strcpy_unalign_return:
+.Lstrcpy_unalign_return:
m_ret inst=pop
-strcpy_unalign7_copy6bytes:
+.Lstrcpy_unalign7_copy6bytes:
stmia r0!, {r2}
strb r3, [r0], #1
strb r4, [r0], #1
m_ret inst=pop
-strcpy_unalign7_copy7bytes:
+.Lstrcpy_unalign7_copy7bytes:
stmia r0!, {r2}
strb r3, [r0], #1
strb r4, [r0], #1
@@ -320,30 +327,30 @@ strcpy_unalign7_copy7bytes:
.p2align 2
// Can read 6 bytes before possibly crossing a page.
-strcpy_unalign6:
+.Lstrcpy_unalign6:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
+ bne .Lstrcpy_zero_in_first_register
ldrb r4, [r1]
- cbz r4, strcpy_unalign_copy5bytes
+ cbz r4, .Lstrcpy_unalign_copy5bytes
ldrb r5, [r1, #1]
- cbz r5, strcpy_unalign_copy6bytes
+ cbz r5, .Lstrcpy_unalign_copy6bytes
ldr r3, [r1], #4
pld [r1, #64]
tst r3, #0xff0000
- beq strcpy_unalign6_copy7bytes
+ beq .Lstrcpy_unalign6_copy7bytes
lsrs ip, r3, #24
stmia r0!, {r2, r3}
- beq strcpy_unalign_return
- b strcpy_unalign6
+ beq .Lstrcpy_unalign_return
+ b .Lstrcpy_unalign6
-strcpy_unalign6_copy7bytes:
+.Lstrcpy_unalign6_copy7bytes:
stmia r0!, {r2}
strh r3, [r0], #2
lsr r3, #16
@@ -352,16 +359,16 @@ strcpy_unalign6_copy7bytes:
.p2align 2
// Can read 5 bytes before possibly crossing a page.
-strcpy_unalign5:
+.Lstrcpy_unalign5:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
+ bne .Lstrcpy_zero_in_first_register
ldrb r4, [r1]
- cbz r4, strcpy_unalign_copy5bytes
+ cbz r4, .Lstrcpy_unalign_copy5bytes
ldr r3, [r1], #4
@@ -370,17 +377,17 @@ strcpy_unalign5:
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcpy_zero_in_second_register
+ bne .Lstrcpy_zero_in_second_register
stmia r0!, {r2, r3}
- b strcpy_unalign5
+ b .Lstrcpy_unalign5
-strcpy_unalign_copy5bytes:
+.Lstrcpy_unalign_copy5bytes:
stmia r0!, {r2}
strb r4, [r0]
m_ret inst=pop
-strcpy_unalign_copy6bytes:
+.Lstrcpy_unalign_copy6bytes:
stmia r0!, {r2}
strb r4, [r0], #1
strb r5, [r0]
@@ -388,13 +395,13 @@ strcpy_unalign_copy6bytes:
.p2align 2
// Can read 4 bytes before possibly crossing a page.
-strcpy_unalign4:
+.Lstrcpy_unalign4:
ldmia r1!, {r2}
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
+ bne .Lstrcpy_zero_in_first_register
ldmia r1!, {r3}
pld [r1, #64]
@@ -402,20 +409,20 @@ strcpy_unalign4:
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcpy_zero_in_second_register
+ bne .Lstrcpy_zero_in_second_register
stmia r0!, {r2, r3}
- b strcpy_unalign4
+ b .Lstrcpy_unalign4
.p2align 2
// Can read 3 bytes before possibly crossing a page.
-strcpy_unalign3:
+.Lstrcpy_unalign3:
ldrb r2, [r1]
- cbz r2, strcpy_unalign3_copy1byte
+ cbz r2, .Lstrcpy_unalign3_copy1byte
ldrb r3, [r1, #1]
- cbz r3, strcpy_unalign3_copy2bytes
+ cbz r3, .Lstrcpy_unalign3_copy2bytes
ldrb r4, [r1, #2]
- cbz r4, strcpy_unalign3_copy3bytes
+ cbz r4, .Lstrcpy_unalign3_copy3bytes
ldr r2, [r1], #4
ldr r3, [r1], #4
@@ -423,26 +430,26 @@ strcpy_unalign3:
pld [r1, #64]
lsrs lr, r2, #24
- beq strcpy_unalign_copy4bytes
+ beq .Lstrcpy_unalign_copy4bytes
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcpy_zero_in_second_register
+ bne .Lstrcpy_zero_in_second_register
stmia r0!, {r2, r3}
- b strcpy_unalign3
+ b .Lstrcpy_unalign3
-strcpy_unalign3_copy1byte:
+.Lstrcpy_unalign3_copy1byte:
strb r2, [r0]
m_ret inst=pop
-strcpy_unalign3_copy2bytes:
+.Lstrcpy_unalign3_copy2bytes:
strb r2, [r0], #1
strb r3, [r0]
m_ret inst=pop
-strcpy_unalign3_copy3bytes:
+.Lstrcpy_unalign3_copy3bytes:
strb r2, [r0], #1
strb r3, [r0], #1
strb r4, [r0]
@@ -450,34 +457,34 @@ strcpy_unalign3_copy3bytes:
.p2align 2
// Can read 2 bytes before possibly crossing a page.
-strcpy_unalign2:
+.Lstrcpy_unalign2:
ldrb r2, [r1]
- cbz r2, strcpy_unalign_copy1byte
+ cbz r2, .Lstrcpy_unalign_copy1byte
ldrb r3, [r1, #1]
- cbz r3, strcpy_unalign_copy2bytes
+ cbz r3, .Lstrcpy_unalign_copy2bytes
ldr r2, [r1], #4
ldr r3, [r1], #4
pld [r1, #64]
tst r2, #0xff0000
- beq strcpy_unalign_copy3bytes
+ beq .Lstrcpy_unalign_copy3bytes
lsrs ip, r2, #24
- beq strcpy_unalign_copy4bytes
+ beq .Lstrcpy_unalign_copy4bytes
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcpy_zero_in_second_register
+ bne .Lstrcpy_zero_in_second_register
stmia r0!, {r2, r3}
- b strcpy_unalign2
+ b .Lstrcpy_unalign2
.p2align 2
// Can read 1 byte before possibly crossing a page.
-strcpy_unalign1:
+.Lstrcpy_unalign1:
ldrb r2, [r1]
- cbz r2, strcpy_unalign_copy1byte
+ cbz r2, .Lstrcpy_unalign_copy1byte
ldr r2, [r1], #4
ldr r3, [r1], #4
@@ -487,62 +494,62 @@ strcpy_unalign1:
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
- bne strcpy_zero_in_first_register
+ bne .Lstrcpy_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcpy_zero_in_second_register
+ bne .Lstrcpy_zero_in_second_register
stmia r0!, {r2, r3}
- b strcpy_unalign1
+ b .Lstrcpy_unalign1
-strcpy_unalign_copy1byte:
+.Lstrcpy_unalign_copy1byte:
strb r2, [r0]
m_ret inst=pop
-strcpy_unalign_copy2bytes:
+.Lstrcpy_unalign_copy2bytes:
strb r2, [r0], #1
strb r3, [r0]
m_ret inst=pop
-strcpy_unalign_copy3bytes:
+.Lstrcpy_unalign_copy3bytes:
strh r2, [r0], #2
lsr r2, #16
strb r2, [r0]
m_ret inst=pop
-strcpy_unalign_copy4bytes:
+.Lstrcpy_unalign_copy4bytes:
stmia r0, {r2}
m_ret inst=pop
-strcat_align_src:
+.Lstrcat_align_src:
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
- beq strcat_align_to_32
+ beq .Lstrcat_align_to_32
ldrb r2, [r0], #1
- cbz r2, strcat_r0_update
+ cbz r2, .Lstrcat_r0_update
-strcat_align_to_32:
- bcc strcat_align_to_64
+.Lstrcat_align_to_32:
+ bcc .Lstrcat_align_to_64
ldrb r2, [r0], #1
- cbz r2, strcat_r0_update
+ cbz r2, .Lstrcat_r0_update
ldrb r2, [r0], #1
- cbz r2, strcat_r0_update
+ cbz r2, .Lstrcat_r0_update
-strcat_align_to_64:
+.Lstrcat_align_to_64:
tst r3, #4
- beq strcat_mainloop
+ beq .Lstrcat_mainloop
ldr r3, [r0], #4
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
- bne strcat_zero_in_second_register
- b strcat_mainloop
+ bne .Lstrcat_zero_in_second_register
+ b .Lstrcat_mainloop
-strcat_r0_update:
+.Lstrcat_r0_update:
sub r0, r0, #1
- b strcat_r0_scan_done
+ b .Lstrcat_r0_scan_done
END(strcat)
diff --git a/libc/arch-arm/cortex-a9/bionic/string_copy.S b/libc/arch-arm/cortex-a9/bionic/string_copy.S
index caf5a11..642db0f 100644
--- a/libc/arch-arm/cortex-a9/bionic/string_copy.S
+++ b/libc/arch-arm/cortex-a9/bionic/string_copy.S
@@ -244,13 +244,20 @@ ENTRY(strcpy)
.Lstringcopy_align_to_64:
tst r3, #4
beq .Lstringcopy_check_src_align
- ldr r2, [r1], #4
-
- sub ip, r2, #0x01010101
- bic ip, ip, r2
- ands ip, ip, #0x80808080
- bne .Lstringcopy_zero_in_first_register
- stmia r0!, {r2}
+ // Read one byte at a time since we don't have any idea about the alignment
+ // of the source and we don't want to read into a different page.
+ ldrb r2, [r1], #1
+ strb r2, [r0], #1
+ cbz r2, .Lstringcopy_complete
+ ldrb r2, [r1], #1
+ strb r2, [r0], #1
+ cbz r2, .Lstringcopy_complete
+ ldrb r2, [r1], #1
+ strb r2, [r0], #1
+ cbz r2, .Lstringcopy_complete
+ ldrb r2, [r1], #1
+ strb r2, [r0], #1
+ cbz r2, .Lstringcopy_complete
b .Lstringcopy_check_src_align
.Lstringcopy_complete:
diff --git a/libc/arch-arm/cortex-a9/cortex-a9.mk b/libc/arch-arm/cortex-a9/cortex-a9.mk
index 7b38de1..db4bcc7 100644
--- a/libc/arch-arm/cortex-a9/cortex-a9.mk
+++ b/libc/arch-arm/cortex-a9/cortex-a9.mk
@@ -10,6 +10,7 @@ libc_bionic_src_files_arm += \
arch-arm/cortex-a9/bionic/strlen.S \
libc_bionic_src_files_arm += \
+ arch-arm/generic/bionic/memchr.S \
arch-arm/generic/bionic/memcmp.S \
libc_bionic_src_files_arm += \
diff --git a/libc/arch-arm/denver/denver.mk b/libc/arch-arm/denver/denver.mk
index 5fddf95..e81f8c7 100644
--- a/libc/arch-arm/denver/denver.mk
+++ b/libc/arch-arm/denver/denver.mk
@@ -1,4 +1,5 @@
libc_bionic_src_files_arm += \
+ arch-arm/generic/bionic/memchr.S \
arch-arm/generic/bionic/memcmp.S \
arch-arm/denver/bionic/memcpy.S \
arch-arm/denver/bionic/memmove.S \
diff --git a/libc/arch-arm/generic/bionic/memchr.S b/libc/arch-arm/generic/bionic/memchr.S
new file mode 100644
index 0000000..cb00d82
--- /dev/null
+++ b/libc/arch-arm/generic/bionic/memchr.S
@@ -0,0 +1,155 @@
+/* Copyright (c) 2010-2015, Linaro Limited
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of Linaro Limited nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ Written by Dave Gilbert <david.gilbert@linaro.org>
+
+ This memchr routine is optimised on a Cortex-A9 and should work on
+ all ARMv7 processors. It has a fast past for short sizes, and has
+ an optimised path for large data sets; the worst case is finding the
+ match early in a large data set.
+
+ */
+
+#include <private/bionic_asm.h>
+
+@ 2011-02-07 david.gilbert@linaro.org
+@ Extracted from local git a5b438d861
+@ 2011-07-14 david.gilbert@linaro.org
+@ Import endianness fix from local git ea786f1b
+@ 2011-12-07 david.gilbert@linaro.org
+@ Removed unneeded cbz from align loop
+
+ .syntax unified
+ .arch armv7-a
+
+@ this lets us check a flag in a 00/ff byte easily in either endianness
+#ifdef __ARMEB__
+#define CHARTSTMASK(c) 1<<(31-(c*8))
+#else
+#define CHARTSTMASK(c) 1<<(c*8)
+#endif
+ .text
+ .thumb
+
+@ ---------------------------------------------------------------------------
+ .thumb_func
+ENTRY(memchr)
+ .p2align 4,,15
+ @ r0 = start of memory to scan
+ @ r1 = character to look for
+ @ r2 = length
+ @ returns r0 = pointer to character or NULL if not found
+ and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char
+
+ cmp r2,#16 @ If it's short don't bother with anything clever
+ blt 20f
+
+ tst r0, #7 @ If it's already aligned skip the next bit
+ beq 10f
+
+ @ Work up to an aligned point
+5:
+ ldrb r3, [r0],#1
+ subs r2, r2, #1
+ cmp r3, r1
+ beq 50f @ If it matches exit found
+ tst r0, #7
+ bne 5b @ If not aligned yet then do next byte
+
+10:
+ @ At this point, we are aligned, we know we have at least 8 bytes to work with
+ push {r4,r5,r6,r7}
+ orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes
+ orr r1, r1, r1, lsl #16
+ bic r4, r2, #7 @ Number of double words to work with
+ mvns r7, #0 @ all F's
+ movs r3, #0
+
+15:
+ ldrd r5,r6,[r0],#8
+ subs r4, r4, #8
+ eor r5,r5, r1 @ Get it so that r5,r6 have 00's where the bytes match the target
+ eor r6,r6, r1
+ uadd8 r5, r5, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0
+ sel r5, r3, r7 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
+ uadd8 r6, r6, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0
+ sel r6, r5, r7 @ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
+ cbnz r6, 60f
+ bne 15b @ (Flags from the subs above) If not run out of bytes then go around again
+
+ pop {r4,r5,r6,r7}
+ and r1,r1,#0xff @ Get r1 back to a single character from the expansion above
+ and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done
+
+20:
+ cbz r2, 40f @ 0 length or hit the end already then not found
+
+21: @ Post aligned section, or just a short call
+ ldrb r3,[r0],#1
+ subs r2,r2,#1
+ eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub
+ cbz r3, 50f
+ bne 21b @ on r2 flags
+
+40:
+ movs r0,#0 @ not found
+ bx lr
+
+50:
+ subs r0,r0,#1 @ found
+ bx lr
+
+60: @ We're here because the fast path found a hit - now we have to track down exactly which word it was
+ @ r0 points to the start of the double word after the one that was tested
+ @ r5 has the 00/ff pattern for the first word, r6 has the chained value
+ cmp r5, #0
+ itte eq
+ moveq r5, r6 @ the end is in the 2nd word
+ subeq r0,r0,#3 @ Points to 2nd byte of 2nd word
+ subne r0,r0,#7 @ or 2nd byte of 1st word
+
+ @ r0 currently points to the 3rd byte of the word containing the hit
+ tst r5, # CHARTSTMASK(0) @ 1st character
+ bne 61f
+ adds r0,r0,#1
+ tst r5, # CHARTSTMASK(1) @ 2nd character
+ ittt eq
+ addeq r0,r0,#1
+ tsteq r5, # (3<<15) @ 2nd & 3rd character
+ @ If not the 3rd must be the last one
+ addeq r0,r0,#1
+
+61:
+ pop {r4,r5,r6,r7}
+ subs r0,r0,#1
+ bx lr
+END(memchr)
diff --git a/libc/arch-arm/generic/bionic/memcmp.S b/libc/arch-arm/generic/bionic/memcmp.S
index c78dbd4..6643d55 100644
--- a/libc/arch-arm/generic/bionic/memcmp.S
+++ b/libc/arch-arm/generic/bionic/memcmp.S
@@ -221,8 +221,7 @@ ENTRY(memcmp)
bne 8b
9: /* restore registers and return */
- ldmfd sp!, {r4, lr}
- bx lr
+ ldmfd sp!, {r4, pc}
10: /* process less than 12 bytes */
cmp r2, #0
diff --git a/libc/arch-arm/generic/bionic/memcpy.S b/libc/arch-arm/generic/bionic/memcpy.S
index ea5a399..65cba4c 100644
--- a/libc/arch-arm/generic/bionic/memcpy.S
+++ b/libc/arch-arm/generic/bionic/memcpy.S
@@ -194,8 +194,7 @@ ENTRY(memcpy)
/* we're done! restore everything and return */
1: ldmfd sp!, {r5-r11}
- ldmfd sp!, {r0, r4, lr}
- bx lr
+ ldmfd sp!, {r0, r4, pc}
/********************************************************************/
@@ -385,8 +384,7 @@ ENTRY(memcpy)
/* we're done! restore sp and spilled registers and return */
add sp, sp, #28
- ldmfd sp!, {r0, r4, lr}
- bx lr
+ ldmfd sp!, {r0, r4, pc}
END(memcpy)
// Only reached when the __memcpy_chk check fails.
diff --git a/libc/arch-arm/generic/bionic/memset.S b/libc/arch-arm/generic/bionic/memset.S
index d17a9c4..b8eabbf 100644
--- a/libc/arch-arm/generic/bionic/memset.S
+++ b/libc/arch-arm/generic/bionic/memset.S
@@ -82,8 +82,7 @@ ENTRY(memset)
strbcs r1, [r0], #1
strbmi r1, [r0], #1
subs r2, r2, r3
- popls {r0, r4-r7, lr} /* return */
- bxls lr
+ popls {r0, r4-r7, pc} /* return */
/* align the destination to a cache-line */
mov r12, r1
@@ -126,8 +125,7 @@ ENTRY(memset)
strhmi r1, [r0], #2
movs r2, r2, lsl #2
strbcs r1, [r0]
- ldmfd sp!, {r0, r4-r7, lr}
- bx lr
+ ldmfd sp!, {r0, r4-r7, pc}
END(memset)
.data
diff --git a/libc/arch-arm/generic/generic.mk b/libc/arch-arm/generic/generic.mk
index e49d6d2..016c882 100644
--- a/libc/arch-arm/generic/generic.mk
+++ b/libc/arch-arm/generic/generic.mk
@@ -1,4 +1,5 @@
libc_bionic_src_files_arm += \
+ arch-arm/generic/bionic/memchr.S \
arch-arm/generic/bionic/memcmp.S \
arch-arm/generic/bionic/memcpy.S \
arch-arm/generic/bionic/memset.S \
diff --git a/libc/arch-arm/krait/bionic/__strcat_chk.S b/libc/arch-arm/krait/bionic/__strcat_chk.S
index 246f159..1a39c5b 100644
--- a/libc/arch-arm/krait/bionic/__strcat_chk.S
+++ b/libc/arch-arm/krait/bionic/__strcat_chk.S
@@ -40,7 +40,7 @@
ENTRY(__strcat_chk)
pld [r0, #0]
push {r0, lr}
- .cfi_def_cfa_offset 8
+ .cfi_adjust_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
push {r4, r5}
@@ -177,7 +177,7 @@ ENTRY(__strcat_chk)
.L_strlen_done:
add r2, r3, r4
cmp r2, lr
- bhi __strcat_chk_failed
+ bhi .L_strcat_chk_failed
// Set up the registers for the memcpy code.
mov r1, r5
@@ -185,20 +185,17 @@ ENTRY(__strcat_chk)
mov r2, r4
add r0, r0, r3
pop {r4, r5}
-END(__strcat_chk)
+ .cfi_adjust_cfa_offset -8
+ .cfi_restore r4
+ .cfi_restore r5
-#define MEMCPY_BASE __strcat_chk_memcpy_base
-#define MEMCPY_BASE_ALIGNED __strcat_chk_memcpy_base_aligned
#include "memcpy_base.S"
-ENTRY_PRIVATE(__strcat_chk_failed)
- .cfi_def_cfa_offset 8
- .cfi_rel_offset r0, 0
- .cfi_rel_offset lr, 4
+ // Undo the above cfi directives.
.cfi_adjust_cfa_offset 8
.cfi_rel_offset r4, 0
.cfi_rel_offset r5, 4
-
+.L_strcat_chk_failed:
ldr r0, error_message
ldr r1, error_code
1:
@@ -208,7 +205,7 @@ error_code:
.word BIONIC_EVENT_STRCAT_BUFFER_OVERFLOW
error_message:
.word error_string-(1b+4)
-END(__strcat_chk_failed)
+END(__strcat_chk)
.data
error_string:
diff --git a/libc/arch-arm/krait/bionic/__strcpy_chk.S b/libc/arch-arm/krait/bionic/__strcpy_chk.S
index db76686..00202f3 100644
--- a/libc/arch-arm/krait/bionic/__strcpy_chk.S
+++ b/libc/arch-arm/krait/bionic/__strcpy_chk.S
@@ -39,7 +39,7 @@
ENTRY(__strcpy_chk)
pld [r0, #0]
push {r0, lr}
- .cfi_def_cfa_offset 8
+ .cfi_adjust_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
@@ -149,21 +149,14 @@ ENTRY(__strcpy_chk)
pld [r1, #64]
ldr r0, [sp]
cmp r3, lr
- bhs __strcpy_chk_failed
+ bhs .L_strcpy_chk_failed
// Add 1 for copy length to get the string terminator.
add r2, r3, #1
-END(__strcpy_chk)
-#define MEMCPY_BASE __strcpy_chk_memcpy_base
-#define MEMCPY_BASE_ALIGNED __strcpy_chk_memcpy_base_aligned
#include "memcpy_base.S"
-ENTRY_PRIVATE(__strcpy_chk_failed)
- .cfi_def_cfa_offset 8
- .cfi_rel_offset r0, 0
- .cfi_rel_offset lr, 4
-
+.L_strcpy_chk_failed:
ldr r0, error_message
ldr r1, error_code
1:
@@ -173,7 +166,7 @@ error_code:
.word BIONIC_EVENT_STRCPY_BUFFER_OVERFLOW
error_message:
.word error_string-(1b+4)
-END(__strcpy_chk_failed)
+END(__strcpy_chk)
.data
error_string:
diff --git a/libc/arch-arm/krait/bionic/memcpy.S b/libc/arch-arm/krait/bionic/memcpy.S
index 9ff46a8..5d27b57 100644
--- a/libc/arch-arm/krait/bionic/memcpy.S
+++ b/libc/arch-arm/krait/bionic/memcpy.S
@@ -45,7 +45,7 @@
ENTRY(__memcpy_chk)
cmp r2, r3
- bhi __memcpy_chk_fail
+ bhi .L_memcpy_chk_fail
// Fall through to memcpy...
END(__memcpy_chk)
@@ -53,19 +53,20 @@ END(__memcpy_chk)
ENTRY(memcpy)
pld [r1, #64]
stmfd sp!, {r0, lr}
- .cfi_def_cfa_offset 8
+ .cfi_adjust_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
-END(memcpy)
-#define MEMCPY_BASE __memcpy_base
-#define MEMCPY_BASE_ALIGNED __memcpy_base_aligned
#include "memcpy_base.S"
-ENTRY_PRIVATE(__memcpy_chk_fail)
+ // Undo the cfi directives from above.
+ .cfi_adjust_cfa_offset -8
+ .cfi_restore r0
+ .cfi_restore lr
+.L_memcpy_chk_fail:
// Preserve lr for backtrace.
push {lr}
- .cfi_def_cfa_offset 4
+ .cfi_adjust_cfa_offset 4
.cfi_rel_offset lr, 0
ldr r0, error_message
@@ -77,7 +78,7 @@ error_code:
.word BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW
error_message:
.word error_string-(1b+4)
-END(__memcpy_chk_fail)
+END(memcpy)
.data
error_string:
diff --git a/libc/arch-arm/krait/bionic/memcpy_base.S b/libc/arch-arm/krait/bionic/memcpy_base.S
index 035dcf1..76c5a84 100644
--- a/libc/arch-arm/krait/bionic/memcpy_base.S
+++ b/libc/arch-arm/krait/bionic/memcpy_base.S
@@ -1,123 +1,191 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-
-/*
- * This code assumes it is running on a processor that supports all arm v7
- * instructions, that supports neon instructions, and that has a 32 byte
- * cache line.
- */
-
-// Assumes neon instructions and a cache line size of 32 bytes.
-
-ENTRY_PRIVATE(MEMCPY_BASE)
- .cfi_def_cfa_offset 8
- .cfi_rel_offset r0, 0
- .cfi_rel_offset lr, 4
-
- /* do we have at least 16-bytes to copy (needed for alignment below) */
- cmp r2, #16
- blo 5f
-
- /* align destination to cache-line for the write-buffer */
- rsb r3, r0, #0
- ands r3, r3, #0xF
- beq 2f
-
- /* copy up to 15-bytes (count in r3) */
- sub r2, r2, r3
- movs ip, r3, lsl #31
- itt mi
- ldrbmi lr, [r1], #1
- strbmi lr, [r0], #1
- itttt cs
- ldrbcs ip, [r1], #1
- ldrbcs lr, [r1], #1
- strbcs ip, [r0], #1
- strbcs lr, [r0], #1
- movs ip, r3, lsl #29
- bge 1f
- // copies 4 bytes, destination 32-bits aligned
- vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]!
- vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]!
-1: bcc 2f
- // copies 8 bytes, destination 64-bits aligned
- vld1.8 {d0}, [r1]!
- vst1.8 {d0}, [r0, :64]!
-
-2: /* make sure we have at least 64 bytes to copy */
- subs r2, r2, #64
- blo 2f
-
-1: /* The main loop copies 64 bytes at a time */
- vld1.8 {d0 - d3}, [r1]!
- vld1.8 {d4 - d7}, [r1]!
- pld [r1, #(32*8)]
- subs r2, r2, #64
- vst1.8 {d0 - d3}, [r0, :128]!
- vst1.8 {d4 - d7}, [r0, :128]!
- bhs 1b
-
-2: /* fix-up the remaining count and make sure we have >= 32 bytes left */
- adds r2, r2, #32
- blo 4f
-
- /* Copy 32 bytes. These cache lines were already preloaded */
- vld1.8 {d0 - d3}, [r1]!
- sub r2, r2, #32
- vst1.8 {d0 - d3}, [r0, :128]!
-
-4: /* less than 32 left */
- add r2, r2, #32
- tst r2, #0x10
- beq 5f
- // copies 16 bytes, 128-bits aligned
- vld1.8 {d0, d1}, [r1]!
- vst1.8 {d0, d1}, [r0, :128]!
-
-5: /* copy up to 15-bytes (count in r2) */
- movs ip, r2, lsl #29
- bcc 1f
- vld1.8 {d0}, [r1]!
- vst1.8 {d0}, [r0]!
-1: bge 2f
- vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]!
- vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]!
-2: movs ip, r2, lsl #31
- itt mi
- ldrbmi r3, [r1], #1
- strbmi r3, [r0], #1
- itttt cs
- ldrbcs ip, [r1], #1
- ldrbcs lr, [r1], #1
- strbcs ip, [r0], #1
- strbcs lr, [r0], #1
-
- ldmfd sp!, {r0, lr}
- bx lr
-END(MEMCPY_BASE)
+/***************************************************************************
+ Copyright (c) 2009-2013 The Linux Foundation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of The Linux Foundation nor the names of its contributors may
+ be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/* Assumes neon instructions and a cache line size of 64 bytes. */
+
+#include <machine/cpu-features.h>
+#include <machine/asm.h>
+
+#define PLDOFFS (10)
+#define PLDTHRESH (PLDOFFS)
+#define BBTHRESH (4096/64)
+#define PLDSIZE (64)
+
+#if (PLDOFFS < 1)
+#error Routine does not support offsets less than 1
+#endif
+
+#if (PLDTHRESH < PLDOFFS)
+#error PLD threshold must be greater than or equal to the PLD offset
+#endif
+
+ .text
+ .fpu neon
+
+.L_memcpy_base:
+ cmp r2, #4
+ blt .L_neon_lt4
+ cmp r2, #16
+ blt .L_neon_lt16
+ cmp r2, #32
+ blt .L_neon_16
+ cmp r2, #64
+ blt .L_neon_copy_32_a
+
+ mov r12, r2, lsr #6
+ cmp r12, #PLDTHRESH
+ ble .L_neon_copy_64_loop_nopld
+
+ push {r9, r10}
+ .cfi_adjust_cfa_offset 8
+ .cfi_rel_offset r9, 0
+ .cfi_rel_offset r10, 4
+
+ cmp r12, #BBTHRESH
+ ble .L_neon_prime_pump
+
+ add lr, r0, #0x400
+ add r9, r1, #(PLDOFFS*PLDSIZE)
+ sub lr, lr, r9
+ lsl lr, lr, #21
+ lsr lr, lr, #21
+ add lr, lr, #(PLDOFFS*PLDSIZE)
+ cmp r12, lr, lsr #6
+ ble .L_neon_prime_pump
+
+ itt gt
+ movgt r9, #(PLDOFFS)
+ rsbsgt r9, r9, lr, lsr #6
+ ble .L_neon_prime_pump
+
+ add r10, r1, lr
+ bic r10, #0x3F
+
+ sub r12, r12, lr, lsr #6
+
+ cmp r9, r12
+ itee le
+ suble r12, r12, r9
+ movgt r9, r12
+ movgt r12, #0
+
+ pld [r1, #((PLDOFFS-1)*PLDSIZE)]
+.L_neon_copy_64_loop_outer_doublepld:
+ pld [r1, #((PLDOFFS)*PLDSIZE)]
+ vld1.32 {q0, q1}, [r1]!
+ vld1.32 {q2, q3}, [r1]!
+ ldr r3, [r10]
+ subs r9, r9, #1
+ vst1.32 {q0, q1}, [r0]!
+ vst1.32 {q2, q3}, [r0]!
+ add r10, #64
+ bne .L_neon_copy_64_loop_outer_doublepld
+ cmp r12, #0
+ beq .L_neon_pop_before_nopld
+
+ cmp r12, #(512*1024/64)
+ blt .L_neon_copy_64_loop_outer
+
+.L_neon_copy_64_loop_ddr:
+ vld1.32 {q0, q1}, [r1]!
+ vld1.32 {q2, q3}, [r1]!
+ pld [r10]
+ subs r12, r12, #1
+ vst1.32 {q0, q1}, [r0]!
+ vst1.32 {q2, q3}, [r0]!
+ add r10, #64
+ bne .L_neon_copy_64_loop_ddr
+ b .L_neon_pop_before_nopld
+
+.L_neon_prime_pump:
+ mov lr, #(PLDOFFS*PLDSIZE)
+ add r10, r1, #(PLDOFFS*PLDSIZE)
+ bic r10, #0x3F
+ sub r12, r12, #PLDOFFS
+ ldr r3, [r10, #(-1*PLDSIZE)]
+
+.L_neon_copy_64_loop_outer:
+ vld1.32 {q0, q1}, [r1]!
+ vld1.32 {q2, q3}, [r1]!
+ ldr r3, [r10]
+ subs r12, r12, #1
+ vst1.32 {q0, q1}, [r0]!
+ vst1.32 {q2, q3}, [r0]!
+ add r10, #64
+ bne .L_neon_copy_64_loop_outer
+
+.L_neon_pop_before_nopld:
+ mov r12, lr, lsr #6
+ pop {r9, r10}
+ .cfi_adjust_cfa_offset -8
+ .cfi_restore r9
+ .cfi_restore r10
+
+.L_neon_copy_64_loop_nopld:
+ vld1.32 {q8, q9}, [r1]!
+ vld1.32 {q10, q11}, [r1]!
+ subs r12, r12, #1
+ vst1.32 {q8, q9}, [r0]!
+ vst1.32 {q10, q11}, [r0]!
+ bne .L_neon_copy_64_loop_nopld
+ ands r2, r2, #0x3f
+ beq .L_neon_exit
+
+.L_neon_copy_32_a:
+ movs r3, r2, lsl #27
+ bcc .L_neon_16
+ vld1.32 {q0,q1}, [r1]!
+ vst1.32 {q0,q1}, [r0]!
+
+.L_neon_16:
+ bpl .L_neon_lt16
+ vld1.32 {q8}, [r1]!
+ vst1.32 {q8}, [r0]!
+ ands r2, r2, #0x0f
+ beq .L_neon_exit
+
+.L_neon_lt16:
+ movs r3, r2, lsl #29
+ bcc 1f
+ vld1.8 {d0}, [r1]!
+ vst1.8 {d0}, [r0]!
+1:
+ bge .L_neon_lt4
+ vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]!
+ vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]!
+
+.L_neon_lt4:
+ movs r2, r2, lsl #31
+ itt cs
+ ldrhcs r3, [r1], #2
+ strhcs r3, [r0], #2
+ itt mi
+ ldrbmi r3, [r1]
+ strbmi r3, [r0]
+
+.L_neon_exit:
+ pop {r0, pc}
diff --git a/libc/arch-arm/krait/bionic/memmove.S b/libc/arch-arm/krait/bionic/memmove.S
new file mode 100644
index 0000000..aea7315
--- /dev/null
+++ b/libc/arch-arm/krait/bionic/memmove.S
@@ -0,0 +1,219 @@
+/***************************************************************************
+ Copyright (c) 2009-2014 The Linux Foundation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of The Linux Foundation nor the names of its contributors may
+ be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/***************************************************************************
+ * Neon memmove: Attempts to do a memmove with Neon registers if possible,
+ * Inputs:
+ * dest: The destination buffer
+ * src: The source buffer
+ * n: The size of the buffer to transfer
+ * Outputs:
+ *
+ ***************************************************************************/
+
+#include <private/bionic_asm.h>
+#include <private/libc_events.h>
+/*
+ * These can be overridden in:
+ * device/<vendor>/<board>/BoardConfig.mk
+ * by setting the following:
+ * TARGET_USE_KRAIT_BIONIC_OPTIMIZATION := true
+ * TARGET_USE_KRAIT_PLD_SET := true
+ * TARGET_KRAIT_BIONIC_PLDOFFS := <pldoffset>
+ * TARGET_KRAIT_BIONIC_PLDSIZE := <pldsize>
+ * TARGET_KRAIT_BIONIC_PLDTHRESH := <pldthreshold>
+ */
+#ifndef PLDOFFS
+#define PLDOFFS (10)
+#endif
+#ifndef PLDTHRESH
+#define PLDTHRESH (PLDOFFS)
+#endif
+#if (PLDOFFS < 5)
+#error Routine does not support offsets less than 5
+#endif
+#if (PLDTHRESH < PLDOFFS)
+#error PLD threshold must be greater than or equal to the PLD offset
+#endif
+#ifndef PLDSIZE
+#define PLDSIZE (64)
+#endif
+
+ .text
+ .syntax unified
+ .fpu neon
+ .thumb
+ .thumb_func
+
+//ENTRY(bcopy)
+// //.cfi_startproc
+// mov r12, r0
+// mov r0, r1
+// mov r1, r12
+// // Fall through to memmove
+// //.cfi_endproc
+//END(bcopy)
+
+ENTRY(memmove)
+_memmove_words:
+ //.cfi_startproc
+ .save {r0, lr}
+ cmp r2, #0
+ it ne
+ subsne r12, r0, r1 // Warning: do not combine these "it" blocks
+ it eq
+ bxeq lr
+// memmove only if r1 < r0 < r1+r2
+ cmp r0, r1
+ itt ge
+ addge r12, r1, r2
+ cmpge r12, r0
+ it le
+ ble memcpy
+ cmp r2, #4
+ it le
+ ble .Lneon_b2f_smallcopy_loop
+ push {r0, lr}
+ add r0, r0, r2
+ add r1, r1, r2
+ cmp r2, #64
+ it ge
+ bge .Lneon_b2f_copy_64
+ cmp r2, #32
+ it ge
+ bge .Lneon_b2f_copy_32
+ cmp r2, #8
+ it ge
+ bge .Lneon_b2f_copy_8
+ b .Lneon_b2f_copy_1
+.Lneon_b2f_copy_64:
+ mov r12, r2, lsr #6
+ add r0, r0, #32
+ add r1, r1, #32
+ cmp r12, #PLDTHRESH
+ it le
+ ble .Lneon_b2f_copy_64_loop_nopld
+ sub r12, #PLDOFFS
+ sub lr, r1, #(PLDOFFS)*PLDSIZE
+.Lneon_b2f_copy_64_loop_outer:
+ pld [lr]
+ sub r1, r1, #96
+ sub r0, r0, #96
+ vld1.32 {q0, q1}, [r1]!
+ vld1.32 {q2, q3}, [r1]
+ sub lr, lr, #64
+ subs r12, r12, #1
+ vst1.32 {q0, q1}, [r0]!
+ vst1.32 {q2, q3}, [r0]
+ it ne
+ bne .Lneon_b2f_copy_64_loop_outer
+ mov r12, #PLDOFFS
+.Lneon_b2f_copy_64_loop_nopld:
+ sub r1, r1, #96
+ sub r0, r0, #96
+ vld1.32 {q8, q9}, [r1]!
+ vld1.32 {q10, q11}, [r1]
+ subs r12, r12, #1
+ vst1.32 {q8, q9}, [r0]!
+ vst1.32 {q10, q11}, [r0]
+ it ne
+ bne .Lneon_b2f_copy_64_loop_nopld
+ ands r2, r2, #0x3f
+ it eq
+ beq .Lneon_memmove_done
+ sub r1, r1, #32
+ sub r0, r0, #32
+ cmp r2, #32
+ it lt
+ blt .Lneon_b2f_copy_8
+.Lneon_b2f_copy_32:
+ sub r1, r1, #32
+ sub r0, r0, #32
+ vld1.32 {q0, q1}, [r1]
+ vst1.32 {q0, q1}, [r0]
+ ands r2, r2, #0x1f
+ it eq
+ beq .Lneon_memmove_done
+.Lneon_b2f_copy_8:
+ movs r12, r2, lsr #0x3
+ it eq
+ beq .Lneon_b2f_copy_1
+.Lneon_b2f_copy_8_loop:
+ sub r1, r1, #8
+ sub r0, r0, #8
+ vld1.32 {d0}, [r1]
+ subs r12, r12, #1
+ vst1.32 {d0}, [r0]
+ it ne
+ bne .Lneon_b2f_copy_8_loop
+ ands r2, r2, #0x7
+ beq .Lneon_memmove_done
+.Lneon_b2f_copy_1:
+ movs r12, r2, lsl #29
+ itttt mi
+ submi r1, r1, #4
+ submi r0, r0, #4
+ ldrmi r3, [r1]
+ strmi r3, [r0]
+ movs r2, r2, lsl #31
+ itttt cs
+ subcs r1, r1, #2
+ subcs r0, r0, #2
+ ldrhcs r3, [r1]
+ strhcs r3, [r0]
+ itttt mi
+ submi r1, r1, #1
+ submi r0, r0, #1
+ ldrbmi r12, [r1]
+ strbmi r12, [r0]
+.Lneon_memmove_done:
+ pop {r0, pc}
+.Lneon_b2f_smallcopy_loop:
+ // 4 bytes or less
+ add r1, r1, r2
+ add r0, r0, r2
+ movs r12, r2, lsl #29
+ itttt mi
+ submi r1, r1, #4
+ submi r0, r0, #4
+ ldrmi r3, [r1]
+ strmi r3, [r0]
+ movs r2, r2, lsl #31
+ itttt cs
+ subcs r1, r1, #2
+ subcs r0, r0, #2
+ ldrhcs r3, [r1]
+ strhcs r3, [r0]
+ itttt mi
+ submi r1, r1, #1
+ submi r0, r0, #1
+ ldrbmi r12, [r1]
+ strbmi r12, [r0]
+ bx lr
+// .cfi_endproc
+END(memmove)
+
diff --git a/libc/arch-arm/krait/bionic/memset.S b/libc/arch-arm/krait/bionic/memset.S
index a4fbe17..ae05965 100644
--- a/libc/arch-arm/krait/bionic/memset.S
+++ b/libc/arch-arm/krait/bionic/memset.S
@@ -69,10 +69,7 @@ END(bzero)
/* memset() returns its first argument. */
ENTRY(memset)
- stmfd sp!, {r0}
- .cfi_def_cfa_offset 4
- .cfi_rel_offset r0, 0
-
+ mov r3, r0
vdup.8 q0, r1
/* make sure we have at least 32 bytes to write */
@@ -82,7 +79,7 @@ ENTRY(memset)
1: /* The main loop writes 32 bytes at a time */
subs r2, r2, #32
- vst1.8 {d0 - d3}, [r0]!
+ vst1.8 {d0 - d3}, [r3]!
bhs 1b
2: /* less than 32 left */
@@ -91,18 +88,17 @@ ENTRY(memset)
beq 3f
// writes 16 bytes, 128-bits aligned
- vst1.8 {d0, d1}, [r0]!
+ vst1.8 {d0, d1}, [r3]!
3: /* write up to 15-bytes (count in r2) */
movs ip, r2, lsl #29
bcc 1f
- vst1.8 {d0}, [r0]!
+ vst1.8 {d0}, [r3]!
1: bge 2f
- vst1.32 {d0[0]}, [r0]!
+ vst1.32 {d0[0]}, [r3]!
2: movs ip, r2, lsl #31
- strbmi r1, [r0], #1
- strbcs r1, [r0], #1
- strbcs r1, [r0], #1
- ldmfd sp!, {r0}
+ strbmi r1, [r3], #1
+ strbcs r1, [r3], #1
+ strbcs r1, [r3], #1
bx lr
END(memset)
diff --git a/libc/arch-arm/krait/krait.mk b/libc/arch-arm/krait/krait.mk
index 88b4d66..5f5b414 100644
--- a/libc/arch-arm/krait/krait.mk
+++ b/libc/arch-arm/krait/krait.mk
@@ -1,9 +1,19 @@
libc_bionic_src_files_arm += \
- arch-arm/krait/bionic/memcpy.S \
arch-arm/krait/bionic/memset.S \
arch-arm/krait/bionic/strcmp.S \
arch-arm/krait/bionic/__strcat_chk.S \
arch-arm/krait/bionic/__strcpy_chk.S \
+ arch-arm/krait/bionic/memmove.S
+
+#For some targets we don't need this optimization.
+#Corresponding flag is defined in device specific folder.
+ifeq ($(TARGET_CPU_MEMCPY_BASE_OPT_DISABLE),true)
+libc_bionic_src_files_arm += \
+ arch-arm/cortex-a15/bionic/memcpy.S
+else
+libc_bionic_src_files_arm += \
+ arch-arm/krait/bionic/memcpy.S
+endif
# Use cortex-a15 versions of strcat/strcpy/strlen and standard memmove
libc_bionic_src_files_arm += \
@@ -13,7 +23,7 @@ libc_bionic_src_files_arm += \
arch-arm/cortex-a15/bionic/strlen.S \
libc_bionic_src_files_arm += \
+ arch-arm/generic/bionic/memchr.S \
arch-arm/generic/bionic/memcmp.S \
-libc_bionic_src_files_arm += \
- arch-arm/denver/bionic/memmove.S \
+
diff --git a/libc/arch-arm/scorpion/scorpion.mk b/libc/arch-arm/scorpion/scorpion.mk
new file mode 100644
index 0000000..ce18a7e
--- /dev/null
+++ b/libc/arch-arm/scorpion/scorpion.mk
@@ -0,0 +1,18 @@
+# Use krait versions of memset/strcmp/memmove
+libc_bionic_src_files_arm += \
+ arch-arm/krait/bionic/memset.S \
+ arch-arm/krait/bionic/strcmp.S \
+ arch-arm/krait/bionic/memmove.S
+
+libc_bionic_src_files_arm += \
+ arch-arm/cortex-a15/bionic/memcpy.S \
+ arch-arm/cortex-a15/bionic/stpcpy.S \
+ arch-arm/cortex-a15/bionic/strcat.S \
+ arch-arm/cortex-a15/bionic/__strcat_chk.S \
+ arch-arm/cortex-a15/bionic/strcpy.S \
+ arch-arm/cortex-a15/bionic/__strcpy_chk.S \
+ arch-arm/cortex-a15/bionic/strlen.S
+
+libc_bionic_src_files_arm += \
+ arch-arm/generic/bionic/memchr.S \
+ arch-arm/generic/bionic/memcmp.S