summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrent DeGraaf <bdegraaf@codeaurora.org>2015-09-18 16:06:28 -0400
committerLinux Build Service Account <lnxbuild@localhost>2015-10-06 03:29:29 -0600
commit4b5256a4395495f7de3224483605e169599bc5a3 (patch)
tree6d228120144bfd7352d5d0ec0d5a62681f2b0270
parent4fbfc0056b85cae636e64f98bd344ad27862da37 (diff)
downloadbionic-4b5256a4395495f7de3224483605e169599bc5a3.zip
bionic-4b5256a4395495f7de3224483605e169599bc5a3.tar.gz
bionic-4b5256a4395495f7de3224483605e169599bc5a3.tar.bz2
libc: kryo specific memory routine
Add kryo specific memcpy Change-Id: Id3af7bdbc9d621c56cd26cbc04f9ad116f228550
-rw-r--r--libc/arch-arm64/kryo/bionic/memcpy.S65
-rw-r--r--libc/arch-arm64/kryo/bionic/memcpy_base.S244
-rw-r--r--libc/arch-arm64/kryo/kryo.mk14
3 files changed, 323 insertions, 0 deletions
diff --git a/libc/arch-arm64/kryo/bionic/memcpy.S b/libc/arch-arm64/kryo/bionic/memcpy.S
new file mode 100644
index 0000000..87e1b3b
--- /dev/null
+++ b/libc/arch-arm64/kryo/bionic/memcpy.S
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+// Prototype: void *memcpy (void *dst, const void *src, size_t count).
+
+#include <private/bionic_asm.h>
+#include <private/libc_events.h>
+
+ENTRY(__memcpy_chk)
+ cmp x2, x3
+ b.hi __memcpy_chk_fail
+
+ // Fall through to memcpy...
+ b memcpy
+END(__memcpy_chk)
+
+ .align 6
+ENTRY(memcpy)
+ #include "memcpy_base.S"
+END(memcpy)
+
+ENTRY_PRIVATE(__memcpy_chk_fail)
+ // Preserve for accurate backtrace.
+ stp x29, x30, [sp, -16]!
+ .cfi_def_cfa_offset 16
+ .cfi_rel_offset x29, 0
+ .cfi_rel_offset x30, 8
+
+ adrp x0, error_string
+ add x0, x0, :lo12:error_string
+ ldr x1, error_code
+ bl __fortify_chk_fail
+error_code:
+ .word BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW
+END(__memcpy_chk_fail)
+
+ .data
+ .align 2
+error_string:
+ .string "memcpy: prevented write past end of buffer"
diff --git a/libc/arch-arm64/kryo/bionic/memcpy_base.S b/libc/arch-arm64/kryo/bionic/memcpy_base.S
new file mode 100644
index 0000000..0096bb7
--- /dev/null
+++ b/libc/arch-arm64/kryo/bionic/memcpy_base.S
@@ -0,0 +1,244 @@
+/* Copyright (c) 2015 The Linux Foundation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of The Linux Foundation nor the names of its contributors may
+ * be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef PLDOFFS
+#undef PLDOFFS
+#endif
+#define PLDOFFS (16)
+
+#ifdef PLDTHRESH
+#undef PLDTHRESH
+#endif
+#define PLDTHRESH (PLDOFFS)
+
+#ifdef BBTHRESH
+#undef BBTHRESH
+#endif
+#define BBTHRESH (2048/128)
+
+#if (PLDOFFS < 1)
+#error Routine does not support offsets less than 1
+#endif
+#if (PLDTHRESH < PLDOFFS)
+#error PLD threshold must be greater than or equal to the PLD offset
+#endif
+
+#ifdef PLDSIZE
+#undef PLDSIZE
+#endif
+#define PLDSIZE (128)
+
+kryo_bb_memcpy:
+ mov x11, x0
+ cmp x2, #4
+ blo kryo_bb_lt4
+ cmp x2, #16
+ blo kryo_bb_lt16
+ cmp x2, #32
+ blo kryo_bb_16
+ cmp x2, #64
+ blo kryo_bb_copy_32_a
+ cmp x2, #128
+ blo kryo_bb_copy_64_a
+
+ // we have at least 127 bytes to achieve 128-byte alignment
+ neg x3, x1 // calculate count to get SOURCE aligned
+ ands x3, x3, #0x7F
+ b.eq kryo_bb_source_aligned // already aligned
+ // alignment fixup, small to large (favorable alignment)
+ tbz x3, #0, 1f
+ ldrb w5, [x1], #1
+ strb w5, [x0], #1
+1: tbz x3, #1, 2f
+ ldrh w6, [x1], #2
+ strh w6, [x0], #2
+2: tbz x3, #2, 3f
+ ldr w8, [x1], #4
+ str w8, [x0], #4
+3: tbz x3, #3, 4f
+ ldr x9, [x1], #8
+ str x9, [x0], #8
+4: tbz x3, #4, 5f
+ ldr q7, [x1], #16
+ str q7, [x0], #16
+5: tbz x3, #5, 55f
+ ldp q0, q1, [x1], #32
+ stp q0, q1, [x0], #32
+55: tbz x3, #6, 6f
+ ldp q0, q1, [x1], #32
+ ldp q2, q3, [x1], #32
+ stp q0, q1, [x0], #32
+ stp q2, q3, [x0], #32
+6: subs x2, x2, x3 // fixup count after alignment
+ b.eq kryo_bb_exit
+ cmp x2, #128
+ blo kryo_bb_copy_64_a
+kryo_bb_source_aligned:
+ lsr x12, x2, #7
+ cmp x12, #PLDTHRESH
+ bls kryo_bb_copy_128_loop_nopld
+
+ cmp x12, #BBTHRESH
+ bls kryo_bb_prime_pump
+
+ add x14, x0, #0x400
+ add x9, x1, #(PLDOFFS*PLDSIZE)
+ sub x14, x14, x9
+ lsl x14, x14, #(21+32)
+ lsr x14, x14, #(21+32)
+ add x14, x14, #(PLDOFFS*PLDSIZE)
+ cmp x12, x14, lsr #7
+ bls kryo_bb_prime_pump
+
+ mov x9, #(PLDOFFS)
+ lsr x13, x14, #7
+ subs x9, x13, x9
+ bls kryo_bb_prime_pump
+
+ add x10, x1, x14
+ bic x10, x10, #0x7F // Round to multiple of PLDSIZE
+
+ sub x12, x12, x14, lsr #7
+ cmp x9, x12
+ sub x13, x12, x9
+ csel x12, x13, x12, LS
+ csel x9, x12, x9, HI
+ csel x12, xzr, x12, HI
+
+ prfm PLDL1STRM, [x1, #((PLDOFFS-1)*PLDSIZE)]
+ prfm PLDL1STRM, [x1, #((PLDOFFS-1)*PLDSIZE+64)]
+kryo_bb_copy_128_loop_outer_doublepld:
+ prfm PLDL1STRM, [x1, #((PLDOFFS)*PLDSIZE)]
+ prfm PLDL1STRM, [x1, #((PLDOFFS)*PLDSIZE)+64]
+ subs x9, x9, #1
+ ldp q0, q1, [x1], #32
+ ldp q2, q3, [x1], #32
+ ldp q4, q5, [x1], #32
+ ldp q6, q7, [x1], #32
+ prfm PLDL1KEEP, [x10]
+ prfm PLDL1KEEP, [x10, #64]
+ add x10, x10, #128
+ stp q0, q1, [x0], #32
+ stp q2, q3, [x0], #32
+ stp q4, q5, [x0], #32
+ stp q6, q7, [x0], #32
+ bne kryo_bb_copy_128_loop_outer_doublepld
+ cmp x12, #0
+ beq kryo_bb_pop_before_nopld
+ cmp x12, #(448*1024/128)
+ bls kryo_bb_copy_128_loop_outer
+
+kryo_bb_copy_128_loop_ddr:
+ subs x12, x12, #1
+ ldr x3, [x10], #128
+ ldp q0, q1, [x1], #32
+ ldp q2, q3, [x1], #32
+ ldp q4, q5, [x1], #32
+ ldp q6, q7, [x1], #32
+ stp q0, q1, [x0], #32
+ stp q2, q3, [x0], #32
+ stp q4, q5, [x0], #32
+ stp q6, q7, [x0], #32
+ bne kryo_bb_copy_128_loop_ddr
+ b kryo_bb_pop_before_nopld
+
+kryo_bb_prime_pump:
+ mov x14, #(PLDOFFS*PLDSIZE)
+ add x10, x1, #(PLDOFFS*PLDSIZE)
+ bic x10, x10, #0x7F
+ sub x12, x12, #PLDOFFS
+ prfm PLDL1KEEP, [x10, #(-1*PLDSIZE)]
+ prfm PLDL1KEEP, [x10, #(-1*PLDSIZE+64)]
+ cmp x12, #(448*1024/128)
+ bhi kryo_bb_copy_128_loop_ddr
+
+kryo_bb_copy_128_loop_outer:
+ subs x12, x12, #1
+ prfm PLDL1KEEP, [x10]
+ prfm PLDL1KEEP, [x10, #64]
+ ldp q0, q1, [x1], #32
+ ldp q2, q3, [x1], #32
+ ldp q4, q5, [x1], #32
+ ldp q6, q7, [x1], #32
+ add x10, x10, #128
+ stp q0, q1, [x0], #32
+ stp q2, q3, [x0], #32
+ stp q4, q5, [x0], #32
+ stp q6, q7, [x0], #32
+ bne kryo_bb_copy_128_loop_outer
+
+kryo_bb_pop_before_nopld:
+ lsr x12, x14, #7
+kryo_bb_copy_128_loop_nopld:
+ ldp q0, q1, [x1], #32
+ ldp q2, q3, [x1], #32
+ ldp q4, q5, [x1], #32
+ ldp q6, q7, [x1], #32
+ subs x12, x12, #1
+ stp q0, q1, [x0], #32
+ stp q2, q3, [x0], #32
+ stp q4, q5, [x0], #32
+ stp q6, q7, [x0], #32
+ bne kryo_bb_copy_128_loop_nopld
+ ands x2, x2, #0x7f
+ beq kryo_bb_exit
+
+kryo_bb_copy_64_a:
+ tbz x2, #6, kryo_bb_copy_32_a
+ ldp q0, q1, [x1], #32
+ ldp q2, q3, [x1], #32
+ stp q0, q1, [x0], #32
+ stp q2, q3, [x0], #32
+kryo_bb_copy_32_a:
+ tbz x2, #5, kryo_bb_16
+ ldp q0, q1, [x1], #32
+ stp q0, q1, [x0], #32
+kryo_bb_16:
+ tbz x2, #4, kryo_bb_lt16
+ ldr q7, [x1], #16
+ str q7, [x0], #16
+ ands x2, x2, #0x0f
+ beq kryo_bb_exit
+kryo_bb_lt16:
+ tbz x2, #3, kryo_bb_lt8
+ ldr x3, [x1], #8
+ str x3, [x0], #8
+kryo_bb_lt8:
+ tbz x2, #2, kryo_bb_lt4
+ ldr w3, [x1], #4
+ str w3, [x0], #4
+kryo_bb_lt4:
+ tbz x2, #1, kryo_bb_lt2
+ ldrh w3, [x1], #2
+ strh w3, [x0], #2
+kryo_bb_lt2:
+ tbz x2, #0, kryo_bb_exit
+ ldrb w3, [x1], #1
+ strb w3, [x0], #1
+kryo_bb_exit:
+ mov x0, x11
+ ret
+
diff --git a/libc/arch-arm64/kryo/kryo.mk b/libc/arch-arm64/kryo/kryo.mk
new file mode 100644
index 0000000..b5b714e
--- /dev/null
+++ b/libc/arch-arm64/kryo/kryo.mk
@@ -0,0 +1,14 @@
+libc_bionic_src_files_arm64 += \
+ arch-arm64/generic/bionic/memchr.S \
+ arch-arm64/generic/bionic/memcmp.S \
+ arch-arm64/kryo/bionic/memcpy.S \
+ arch-arm64/generic/bionic/memmove.S \
+ arch-arm64/generic/bionic/memset.S \
+ arch-arm64/generic/bionic/stpcpy.S \
+ arch-arm64/generic/bionic/strchr.S \
+ arch-arm64/generic/bionic/strcmp.S \
+ arch-arm64/generic/bionic/strcpy.S \
+ arch-arm64/generic/bionic/strlen.S \
+ arch-arm64/generic/bionic/strncmp.S \
+ arch-arm64/generic/bionic/strnlen.S \
+ arch-arm64/generic/bionic/wmemmove.S