diff options
author | Christopher Ferris <cferris@google.com> | 2015-07-23 20:27:42 -0700 |
---|---|---|
committer | Steve Kondik <shade@chemlab.org> | 2015-10-29 22:48:08 -0700 |
commit | 73385a8d1848d324ca1a03bcc920877bf557c720 (patch) | |
tree | 2aef84a19c7396d3ea8391e35fda109a99068145 /libc/arch-arm/cortex-a7 | |
parent | f8a907d25a9f319e67fcf005638adb52fa09dd8b (diff) | |
download | bionic-73385a8d1848d324ca1a03bcc920877bf557c720.zip bionic-73385a8d1848d324ca1a03bcc920877bf557c720.tar.gz bionic-73385a8d1848d324ca1a03bcc920877bf557c720.tar.bz2 |
Add optimized cortex-a7/cortex-a53 memset/memcpy.
Add an optimized memset that is ~20% faster for cortex-a7 and
cortex-a53.
Add a 32 bit optimized cortex-a53 memcpy that is about ~20% faster
on cached data.
Fix the cortex-a15 __str{cat,cpy}_chk.S, memcpy_base.S to remove
the phony functions, since they aren't needed any more. Then add
a direct include of these for cortex-a53.
Verified the new functions by stepping through all of the major
paths and verifying the backtrace is still correct.
Bug: 22696180
Change-Id: Iec92a3f82d51243cca76c9aff9f35d920ff865ae
Diffstat (limited to 'libc/arch-arm/cortex-a7')
-rw-r--r-- | libc/arch-arm/cortex-a7/bionic/memset.S | 180 | ||||
-rw-r--r-- | libc/arch-arm/cortex-a7/cortex-a7.mk | 19 |
2 files changed, 198 insertions, 1 deletions
diff --git a/libc/arch-arm/cortex-a7/bionic/memset.S b/libc/arch-arm/cortex-a7/bionic/memset.S new file mode 100644 index 0000000..6365b06 --- /dev/null +++ b/libc/arch-arm/cortex-a7/bionic/memset.S @@ -0,0 +1,180 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <machine/cpu-features.h> +#include <private/bionic_asm.h> +#include <private/libc_events.h> + + /* + * Optimized memset() for ARM. + * + * memset() returns its first argument. + */ + + .fpu neon + .syntax unified + +ENTRY(__memset_chk) + cmp r2, r3 + bls .L_done + + // Preserve lr for backtrace. + push {lr} + .cfi_def_cfa_offset 4 + .cfi_rel_offset lr, 0 + + ldr r0, error_message + ldr r1, error_code +1: + add r0, pc + bl __fortify_chk_fail +error_code: + .word BIONIC_EVENT_MEMSET_BUFFER_OVERFLOW +error_message: + .word error_string-(1b+8) +END(__memset_chk) + +ENTRY(bzero) + mov r2, r1 + mov r1, #0 +.L_done: + // Fall through to memset... +END(bzero) + +ENTRY(memset) + mov r3, r0 + // At this point only d0, d1 are going to be used below. + vdup.8 q0, r1 + cmp r2, #16 + blo .L_set_less_than_16_unknown_align + +.L_check_alignment: + // Align destination to a double word to avoid the store crossing + // a cache line boundary. + ands ip, r3, #7 + bne .L_do_double_word_align + +.L_double_word_aligned: + // Duplicate since the less than 64 can use d2, d3. + vmov q1, q0 + subs r2, #64 + blo .L_set_less_than_64 + + // Duplicate the copy value so that we can store 64 bytes at a time. + vmov q2, q0 + vmov q3, q0 + +1: // Main loop stores 64 bytes at a time. + subs r2, #64 + vstmia r3!, {d0 - d7} + bge 1b + +.L_set_less_than_64: + // Restore r2 to the count of bytes left to set. + add r2, #64 + lsls ip, r2, #27 + bcc .L_set_less_than_32 + // Set 32 bytes. + vstmia r3!, {d0 - d3} + +.L_set_less_than_32: + bpl .L_set_less_than_16 + // Set 16 bytes. + vstmia r3!, {d0, d1} + +.L_set_less_than_16: + // Less than 16 bytes to set. + lsls ip, r2, #29 + bcc .L_set_less_than_8 + + // Set 8 bytes. + vstmia r3!, {d0} + +.L_set_less_than_8: + bpl .L_set_less_than_4 + // Set 4 bytes + vst1.32 {d0[0]}, [r3]! + +.L_set_less_than_4: + lsls ip, r2, #31 + it ne + strbne r1, [r3], #1 + itt cs + strbcs r1, [r3], #1 + strbcs r1, [r3] + bx lr + +.L_do_double_word_align: + rsb ip, ip, #8 + sub r2, r2, ip + + // Do this comparison now, otherwise we'll need to save a + // register to the stack since we've used all available + // registers. + cmp ip, #4 + blo 1f + + // Need to do a four byte copy. + movs ip, ip, lsl #31 + it mi + strbmi r1, [r3], #1 + itt cs + strbcs r1, [r3], #1 + strbcs r1, [r3], #1 + vst1.32 {d0[0]}, [r3]! + b .L_double_word_aligned + +1: + // No four byte copy. + movs ip, ip, lsl #31 + it mi + strbmi r1, [r3], #1 + itt cs + strbcs r1, [r3], #1 + strbcs r1, [r3], #1 + b .L_double_word_aligned + +.L_set_less_than_16_unknown_align: + // Set up to 15 bytes. + movs ip, r2, lsl #29 + bcc 1f + vst1.8 {d0}, [r3]! +1: bge 2f + vst1.32 {d0[0]}, [r3]! +2: movs ip, r2, lsl #31 + it mi + strbmi r1, [r3], #1 + itt cs + strbcs r1, [r3], #1 + strbcs r1, [r3], #1 + bx lr +END(memset) + + .data +error_string: + .string "memset: prevented write past end of buffer" diff --git a/libc/arch-arm/cortex-a7/cortex-a7.mk b/libc/arch-arm/cortex-a7/cortex-a7.mk index 9af03d9..b6af4da 100644 --- a/libc/arch-arm/cortex-a7/cortex-a7.mk +++ b/libc/arch-arm/cortex-a7/cortex-a7.mk @@ -1 +1,18 @@ -include bionic/libc/arch-arm/cortex-a15/cortex-a15.mk +libc_bionic_src_files_arm += \ + arch-arm/cortex-a7/bionic/memset.S \ + +libc_bionic_src_files_arm += \ + arch-arm/cortex-a15/bionic/memcpy.S \ + arch-arm/cortex-a15/bionic/stpcpy.S \ + arch-arm/cortex-a15/bionic/strcat.S \ + arch-arm/cortex-a15/bionic/__strcat_chk.S \ + arch-arm/cortex-a15/bionic/strcmp.S \ + arch-arm/cortex-a15/bionic/strcpy.S \ + arch-arm/cortex-a15/bionic/__strcpy_chk.S \ + arch-arm/cortex-a15/bionic/strlen.S \ + +libc_bionic_src_files_arm += \ + arch-arm/generic/bionic/memcmp.S \ + +libc_bionic_src_files_arm += \ + arch-arm/denver/bionic/memmove.S \ |