summaryrefslogtreecommitdiffstats
path: root/libc/arch-mips/string/memcpy.S
diff options
context:
space:
mode:
authorSteve Kondik <shade@chemlab.org>2012-11-18 19:44:25 -0800
committerSteve Kondik <shade@chemlab.org>2012-11-18 19:44:25 -0800
commit5015fe9bf8869e794cfd972189876e5ff67a3a77 (patch)
treeac5fbd6139872dbaabf3c3fa01d1ad3af441d72f /libc/arch-mips/string/memcpy.S
parent7f687eb12e9977215416469c3edfc9f464a5883c (diff)
parentf29c214d6ad97f9a5348407cc66a58aec2228ca9 (diff)
downloadbionic-5015fe9bf8869e794cfd972189876e5ff67a3a77.zip
bionic-5015fe9bf8869e794cfd972189876e5ff67a3a77.tar.gz
bionic-5015fe9bf8869e794cfd972189876e5ff67a3a77.tar.bz2
Merge branch 'jb-mr1-release' of https://android.googlesource.com/platform/bionic into mr1
Conflicts: libc/Android.mk libc/bionic/system_properties.c libc/kernel/common/linux/msm_mdp.h libc/tools/gensyscalls.py libm/Android.mk linker/linker.c Change-Id: I11944300d7fcf2fd9dc587d8c7a937bf5366bcc0
Diffstat (limited to 'libc/arch-mips/string/memcpy.S')
-rw-r--r--libc/arch-mips/string/memcpy.S423
1 files changed, 423 insertions, 0 deletions
diff --git a/libc/arch-mips/string/memcpy.S b/libc/arch-mips/string/memcpy.S
new file mode 100644
index 0000000..aabdfcf
--- /dev/null
+++ b/libc/arch-mips/string/memcpy.S
@@ -0,0 +1,423 @@
+/*
+ * Copyright (c) 2009
+ * MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/************************************************************************
+ *
+ * memcpy.S
+ * Version: "043009"
+ *
+ ************************************************************************/
+
+
+/************************************************************************
+ * Include files
+ ************************************************************************/
+
+#include "machine/asm.h"
+
+
+/*
+ * This routine could be optimized for MIPS64. The current code only
+ * uses MIPS32 instructions.
+ */
+#if defined(__MIPSEB__)
+# define LWHI lwl /* high part is left in big-endian */
+# define SWHI swl /* high part is left in big-endian */
+# define LWLO lwr /* low part is right in big-endian */
+# define SWLO swr /* low part is right in big-endian */
+#endif
+
+#if defined(__MIPSEL__)
+# define LWHI lwr /* high part is right in little-endian */
+# define SWHI swr /* high part is right in little-endian */
+# define LWLO lwl /* low part is left in big-endian */
+# define SWLO swl /* low part is left in big-endian */
+#endif
+
+LEAF(memcpy,0)
+
+ .set noreorder
+ .set noat
+/*
+ * Below we handle the case where memcpy is called with overlapping src and dst.
+ * Although memcpy is not required to handle this case, some parts of Android like Skia
+ * rely on such usage. We call memmove to handle such cases.
+ */
+ subu t0,a0,a1
+ sra AT,t0,31
+ xor t1,t0,AT
+ subu t0,t1,AT
+ sltu AT,t0,a2
+ beq AT,zero,.Lmemcpy
+ la t9,memmove
+ jr t9
+ nop
+.Lmemcpy:
+ slti AT,a2,8
+ bne AT,zero,.Llast8
+ move v0,a0 # memcpy returns the dst pointer
+
+# Test if the src and dst are word-aligned, or can be made word-aligned
+ xor t8,a1,a0
+ andi t8,t8,0x3 # t8 is a0/a1 word-displacement
+
+ bne t8,zero,.Lunaligned
+ negu a3,a0
+
+ andi a3,a3,0x3 # we need to copy a3 bytes to make a0/a1 aligned
+ beq a3,zero,.Lchk16w # when a3=0 then the dst (a0) is word-aligned
+ subu a2,a2,a3 # now a2 is the remining bytes count
+
+ LWHI t8,0(a1)
+ addu a1,a1,a3
+ SWHI t8,0(a0)
+ addu a0,a0,a3
+
+# Now the dst/src are mutually word-aligned with word-aligned addresses
+.Lchk16w:
+ andi t8,a2,0x3f # any whole 64-byte chunks?
+ # t8 is the byte count after 64-byte chunks
+
+ beq a2,t8,.Lchk8w # if a2==t8, no 64-byte chunks
+ # There will be at most 1 32-byte chunk after it
+ subu a3,a2,t8 # subtract from a2 the reminder
+ # Here a3 counts bytes in 16w chunks
+ addu a3,a0,a3 # Now a3 is the final dst after 64-byte chunks
+
+ addu t0,a0,a2 # t0 is the "past the end" address
+
+# When in the loop we exercise "pref 30,x(a0)", the a0+x should not be past
+# the "t0-32" address
+# This means: for x=128 the last "safe" a0 address is "t0-160"
+# Alternatively, for x=64 the last "safe" a0 address is "t0-96"
+# In the current version we will use "pref 30,128(a0)", so "t0-160" is the limit
+ subu t9,t0,160 # t9 is the "last safe pref 30,128(a0)" address
+
+ pref 0,0(a1) # bring the first line of src, addr 0
+ pref 0,32(a1) # bring the second line of src, addr 32
+ pref 0,64(a1) # bring the third line of src, addr 64
+ pref 30,32(a0) # safe, as we have at least 64 bytes ahead
+# In case the a0 > t9 don't use "pref 30" at all
+ sgtu v1,a0,t9
+ bgtz v1,.Lloop16w # skip "pref 30,64(a0)" for too short arrays
+ nop
+# otherwise, start with using pref30
+ pref 30,64(a0)
+.Lloop16w:
+ pref 0,96(a1)
+ lw t0,0(a1)
+ bgtz v1,.Lskip_pref30_96 # skip "pref 30,96(a0)"
+ lw t1,4(a1)
+ pref 30,96(a0) # continue setting up the dest, addr 96
+.Lskip_pref30_96:
+ lw t2,8(a1)
+ lw t3,12(a1)
+ lw t4,16(a1)
+ lw t5,20(a1)
+ lw t6,24(a1)
+ lw t7,28(a1)
+ pref 0,128(a1) # bring the next lines of src, addr 128
+
+ sw t0,0(a0)
+ sw t1,4(a0)
+ sw t2,8(a0)
+ sw t3,12(a0)
+ sw t4,16(a0)
+ sw t5,20(a0)
+ sw t6,24(a0)
+ sw t7,28(a0)
+
+ lw t0,32(a1)
+ bgtz v1,.Lskip_pref30_128 # skip "pref 30,128(a0)"
+ lw t1,36(a1)
+ pref 30,128(a0) # continue setting up the dest, addr 128
+.Lskip_pref30_128:
+ lw t2,40(a1)
+ lw t3,44(a1)
+ lw t4,48(a1)
+ lw t5,52(a1)
+ lw t6,56(a1)
+ lw t7,60(a1)
+ pref 0, 160(a1) # bring the next lines of src, addr 160
+
+ sw t0,32(a0)
+ sw t1,36(a0)
+ sw t2,40(a0)
+ sw t3,44(a0)
+ sw t4,48(a0)
+ sw t5,52(a0)
+ sw t6,56(a0)
+ sw t7,60(a0)
+
+ addiu a0,a0,64 # adding 64 to dest
+ sgtu v1,a0,t9
+ bne a0,a3,.Lloop16w
+ addiu a1,a1,64 # adding 64 to src
+ move a2,t8
+
+# Here we have src and dest word-aligned but less than 64-bytes to go
+
+.Lchk8w:
+ pref 0, 0x0(a1)
+ andi t8,a2,0x1f # is there a 32-byte chunk?
+ # the t8 is the reminder count past 32-bytes
+ beq a2,t8,.Lchk1w # when a2=t8, no 32-byte chunk
+ nop
+
+ lw t0,0(a1)
+ lw t1,4(a1)
+ lw t2,8(a1)
+ lw t3,12(a1)
+ lw t4,16(a1)
+ lw t5,20(a1)
+ lw t6,24(a1)
+ lw t7,28(a1)
+ addiu a1,a1,32
+
+ sw t0,0(a0)
+ sw t1,4(a0)
+ sw t2,8(a0)
+ sw t3,12(a0)
+ sw t4,16(a0)
+ sw t5,20(a0)
+ sw t6,24(a0)
+ sw t7,28(a0)
+ addiu a0,a0,32
+
+.Lchk1w:
+ andi a2,t8,0x3 # now a2 is the reminder past 1w chunks
+ beq a2,t8,.Llast8
+ subu a3,t8,a2 # a3 is count of bytes in 1w chunks
+ addu a3,a0,a3 # now a3 is the dst address past the 1w chunks
+
+# copying in words (4-byte chunks)
+.LwordCopy_loop:
+ lw t3,0(a1) # the first t3 may be equal t0 ... optimize?
+ addiu a1,a1,4
+ addiu a0,a0,4
+ bne a0,a3,.LwordCopy_loop
+ sw t3,-4(a0)
+
+# For the last (<8) bytes
+.Llast8:
+ blez a2,.Lleave
+ addu a3,a0,a2 # a3 is the last dst address
+.Llast8loop:
+ lb v1,0(a1)
+ addiu a1,a1,1
+ addiu a0,a0,1
+ bne a0,a3,.Llast8loop
+ sb v1,-1(a0)
+
+.Lleave:
+ j ra
+ nop
+
+#
+# UNALIGNED case
+#
+
+.Lunaligned:
+ # got here with a3="negu a0"
+ andi a3,a3,0x3 # test if the a0 is word aligned
+ beqz a3,.Lua_chk16w
+ subu a2,a2,a3 # bytes left after initial a3 bytes
+
+ LWHI v1,0(a1)
+ LWLO v1,3(a1)
+ addu a1,a1,a3 # a3 may be here 1, 2 or 3
+ SWHI v1,0(a0)
+ addu a0,a0,a3 # below the dst will be word aligned (NOTE1)
+
+.Lua_chk16w:
+ andi t8,a2,0x3f # any whole 64-byte chunks?
+ # t8 is the byte count after 64-byte chunks
+ beq a2,t8,.Lua_chk8w # if a2==t8, no 64-byte chunks
+ # There will be at most 1 32-byte chunk after it
+ subu a3,a2,t8 # subtract from a2 the reminder
+ # Here a3 counts bytes in 16w chunks
+ addu a3,a0,a3 # Now a3 is the final dst after 64-byte chunks
+
+ addu t0,a0,a2 # t0 is the "past the end" address
+
+ subu t9,t0,160 # t9 is the "last safe pref 30,128(a0)" address
+
+ pref 0,0(a1) # bring the first line of src, addr 0
+ pref 0,32(a1) # bring the second line of src, addr 32
+ pref 0,64(a1) # bring the third line of src, addr 64
+ pref 30,32(a0) # safe, as we have at least 64 bytes ahead
+# In case the a0 > t9 don't use "pref 30" at all
+ sgtu v1,a0,t9
+ bgtz v1,.Lua_loop16w # skip "pref 30,64(a0)" for too short arrays
+ nop
+# otherwise, start with using pref30
+ pref 30,64(a0)
+.Lua_loop16w:
+ pref 0,96(a1)
+ LWHI t0,0(a1)
+ LWLO t0,3(a1)
+ LWHI t1,4(a1)
+ bgtz v1,.Lua_skip_pref30_96
+ LWLO t1,7(a1)
+ pref 30,96(a0) # continue setting up the dest, addr 96
+.Lua_skip_pref30_96:
+ LWHI t2,8(a1)
+ LWLO t2,11(a1)
+ LWHI t3,12(a1)
+ LWLO t3,15(a1)
+ LWHI t4,16(a1)
+ LWLO t4,19(a1)
+ LWHI t5,20(a1)
+ LWLO t5,23(a1)
+ LWHI t6,24(a1)
+ LWLO t6,27(a1)
+ LWHI t7,28(a1)
+ LWLO t7,31(a1)
+ pref 0,128(a1) # bring the next lines of src, addr 128
+
+ sw t0,0(a0)
+ sw t1,4(a0)
+ sw t2,8(a0)
+ sw t3,12(a0)
+ sw t4,16(a0)
+ sw t5,20(a0)
+ sw t6,24(a0)
+ sw t7,28(a0)
+
+ LWHI t0,32(a1)
+ LWLO t0,35(a1)
+ LWHI t1,36(a1)
+ bgtz v1,.Lua_skip_pref30_128
+ LWLO t1,39(a1)
+ pref 30,128(a0) # continue setting up the dest, addr 128
+.Lua_skip_pref30_128:
+ LWHI t2,40(a1)
+ LWLO t2,43(a1)
+ LWHI t3,44(a1)
+ LWLO t3,47(a1)
+ LWHI t4,48(a1)
+ LWLO t4,51(a1)
+ LWHI t5,52(a1)
+ LWLO t5,55(a1)
+ LWHI t6,56(a1)
+ LWLO t6,59(a1)
+ LWHI t7,60(a1)
+ LWLO t7,63(a1)
+ pref 0, 160(a1) # bring the next lines of src, addr 160
+
+ sw t0,32(a0)
+ sw t1,36(a0)
+ sw t2,40(a0)
+ sw t3,44(a0)
+ sw t4,48(a0)
+ sw t5,52(a0)
+ sw t6,56(a0)
+ sw t7,60(a0)
+
+ addiu a0,a0,64 # adding 64 to dest
+ sgtu v1,a0,t9
+ bne a0,a3,.Lua_loop16w
+ addiu a1,a1,64 # adding 64 to src
+ move a2,t8
+
+# Here we have src and dest word-aligned but less than 64-bytes to go
+
+.Lua_chk8w:
+ pref 0, 0x0(a1)
+ andi t8,a2,0x1f # is there a 32-byte chunk?
+ # the t8 is the reminder count
+ beq a2,t8,.Lua_chk1w # when a2=t8, no 32-byte chunk
+ nop
+
+ LWHI t0,0(a1)
+ LWLO t0,3(a1)
+ LWHI t1,4(a1)
+ LWLO t1,7(a1)
+ LWHI t2,8(a1)
+ LWLO t2,11(a1)
+ LWHI t3,12(a1)
+ LWLO t3,15(a1)
+ LWHI t4,16(a1)
+ LWLO t4,19(a1)
+ LWHI t5,20(a1)
+ LWLO t5,23(a1)
+ LWHI t6,24(a1)
+ LWLO t6,27(a1)
+ LWHI t7,28(a1)
+ LWLO t7,31(a1)
+ addiu a1,a1,32
+
+ sw t0,0(a0)
+ sw t1,4(a0)
+ sw t2,8(a0)
+ sw t3,12(a0)
+ sw t4,16(a0)
+ sw t5,20(a0)
+ sw t6,24(a0)
+ sw t7,28(a0)
+ addiu a0,a0,32
+
+.Lua_chk1w:
+ andi a2,t8,0x3 # now a2 is the reminder past 1w chunks
+ beq a2,t8,.Lua_smallCopy
+ subu a3,t8,a2 # a3 is count of bytes in 1w chunks
+ addu a3,a0,a3 # now a3 is the dst address past the 1w chunks
+
+# copying in words (4-byte chunks)
+.Lua_wordCopy_loop:
+ LWHI v1,0(a1)
+ LWLO v1,3(a1)
+ addiu a1,a1,4
+ addiu a0,a0,4 # note: dst=a0 is word aligned here, see NOTE1
+ bne a0,a3,.Lua_wordCopy_loop
+ sw v1,-4(a0)
+
+# Now less than 4 bytes (value in a2) left to copy
+.Lua_smallCopy:
+ beqz a2,.Lleave
+ addu a3,a0,a2 # a3 is the last dst address
+.Lua_smallCopy_loop:
+ lb v1,0(a1)
+ addiu a1,a1,1
+ addiu a0,a0,1
+ bne a0,a3,.Lua_smallCopy_loop
+ sb v1,-1(a0)
+
+ j ra
+ nop
+
+ .set at
+ .set reorder
+
+END(memcpy)
+
+
+/************************************************************************
+ * Implementation : Static functions
+ ************************************************************************/