diff options
Diffstat (limited to 'libc/arch-mips64/string/memcpy.S')
-rw-r--r-- | libc/arch-mips64/string/memcpy.S | 423 |
1 files changed, 0 insertions, 423 deletions
diff --git a/libc/arch-mips64/string/memcpy.S b/libc/arch-mips64/string/memcpy.S deleted file mode 100644 index dc91096..0000000 --- a/libc/arch-mips64/string/memcpy.S +++ /dev/null @@ -1,423 +0,0 @@ -/* - * Copyright (c) 2009 - * MIPS Technologies, Inc., California. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/************************************************************************ - * - * memcpy.S - * Version: "043009" - * - ************************************************************************/ - - -/************************************************************************ - * Include files - ************************************************************************/ - -#include <private/bionic_asm.h> - - -/* - * This routine could be optimized for MIPS64. The current code only - * uses MIPS32 instructions. - */ -#if defined(__MIPSEB__) -# define LWHI lwl /* high part is left in big-endian */ -# define SWHI swl /* high part is left in big-endian */ -# define LWLO lwr /* low part is right in big-endian */ -# define SWLO swr /* low part is right in big-endian */ -#endif - -#if defined(__MIPSEL__) -# define LWHI lwr /* high part is right in little-endian */ -# define SWHI swr /* high part is right in little-endian */ -# define LWLO lwl /* low part is left in big-endian */ -# define SWLO swl /* low part is left in big-endian */ -#endif - -LEAF(memcpy,0) - - .set noreorder - .set noat -/* - * Below we handle the case where memcpy is called with overlapping src and dst. - * Although memcpy is not required to handle this case, some parts of Android like Skia - * rely on such usage. We call memmove to handle such cases. - */ - subu t0,a0,a1 - sra AT,t0,31 - xor t1,t0,AT - subu t0,t1,AT - sltu AT,t0,a2 - beq AT,zero,.Lmemcpy - la t9,memmove - jr t9 - nop -.Lmemcpy: - slti AT,a2,8 - bne AT,zero,.Llast8 - move v0,a0 # memcpy returns the dst pointer - -# Test if the src and dst are word-aligned, or can be made word-aligned - xor t8,a1,a0 - andi t8,t8,0x3 # t8 is a0/a1 word-displacement - - bne t8,zero,.Lunaligned - negu a3,a0 - - andi a3,a3,0x3 # we need to copy a3 bytes to make a0/a1 aligned - beq a3,zero,.Lchk16w # when a3=0 then the dst (a0) is word-aligned - subu a2,a2,a3 # now a2 is the remining bytes count - - LWHI t8,0(a1) - addu a1,a1,a3 - SWHI t8,0(a0) - addu a0,a0,a3 - -# Now the dst/src are mutually word-aligned with word-aligned addresses -.Lchk16w: - andi t8,a2,0x3f # any whole 64-byte chunks? - # t8 is the byte count after 64-byte chunks - - beq a2,t8,.Lchk8w # if a2==t8, no 64-byte chunks - # There will be at most 1 32-byte chunk after it - subu a3,a2,t8 # subtract from a2 the reminder - # Here a3 counts bytes in 16w chunks - addu a3,a0,a3 # Now a3 is the final dst after 64-byte chunks - - addu t0,a0,a2 # t0 is the "past the end" address - -# When in the loop we exercise "pref 30,x(a0)", the a0+x should not be past -# the "t0-32" address -# This means: for x=128 the last "safe" a0 address is "t0-160" -# Alternatively, for x=64 the last "safe" a0 address is "t0-96" -# In the current version we will use "pref 30,128(a0)", so "t0-160" is the limit - subu t9,t0,160 # t9 is the "last safe pref 30,128(a0)" address - - pref 0,0(a1) # bring the first line of src, addr 0 - pref 0,32(a1) # bring the second line of src, addr 32 - pref 0,64(a1) # bring the third line of src, addr 64 - pref 30,32(a0) # safe, as we have at least 64 bytes ahead -# In case the a0 > t9 don't use "pref 30" at all - sgtu v1,a0,t9 - bgtz v1,.Lloop16w # skip "pref 30,64(a0)" for too short arrays - nop -# otherwise, start with using pref30 - pref 30,64(a0) -.Lloop16w: - pref 0,96(a1) - lw t0,0(a1) - bgtz v1,.Lskip_pref30_96 # skip "pref 30,96(a0)" - lw t1,4(a1) - pref 30,96(a0) # continue setting up the dest, addr 96 -.Lskip_pref30_96: - lw t2,8(a1) - lw t3,12(a1) - lw t4,16(a1) - lw t5,20(a1) - lw t6,24(a1) - lw t7,28(a1) - pref 0,128(a1) # bring the next lines of src, addr 128 - - sw t0,0(a0) - sw t1,4(a0) - sw t2,8(a0) - sw t3,12(a0) - sw t4,16(a0) - sw t5,20(a0) - sw t6,24(a0) - sw t7,28(a0) - - lw t0,32(a1) - bgtz v1,.Lskip_pref30_128 # skip "pref 30,128(a0)" - lw t1,36(a1) - pref 30,128(a0) # continue setting up the dest, addr 128 -.Lskip_pref30_128: - lw t2,40(a1) - lw t3,44(a1) - lw t4,48(a1) - lw t5,52(a1) - lw t6,56(a1) - lw t7,60(a1) - pref 0, 160(a1) # bring the next lines of src, addr 160 - - sw t0,32(a0) - sw t1,36(a0) - sw t2,40(a0) - sw t3,44(a0) - sw t4,48(a0) - sw t5,52(a0) - sw t6,56(a0) - sw t7,60(a0) - - addiu a0,a0,64 # adding 64 to dest - sgtu v1,a0,t9 - bne a0,a3,.Lloop16w - addiu a1,a1,64 # adding 64 to src - move a2,t8 - -# Here we have src and dest word-aligned but less than 64-bytes to go - -.Lchk8w: - pref 0, 0x0(a1) - andi t8,a2,0x1f # is there a 32-byte chunk? - # the t8 is the reminder count past 32-bytes - beq a2,t8,.Lchk1w # when a2=t8, no 32-byte chunk - nop - - lw t0,0(a1) - lw t1,4(a1) - lw t2,8(a1) - lw t3,12(a1) - lw t4,16(a1) - lw t5,20(a1) - lw t6,24(a1) - lw t7,28(a1) - addiu a1,a1,32 - - sw t0,0(a0) - sw t1,4(a0) - sw t2,8(a0) - sw t3,12(a0) - sw t4,16(a0) - sw t5,20(a0) - sw t6,24(a0) - sw t7,28(a0) - addiu a0,a0,32 - -.Lchk1w: - andi a2,t8,0x3 # now a2 is the reminder past 1w chunks - beq a2,t8,.Llast8 - subu a3,t8,a2 # a3 is count of bytes in 1w chunks - addu a3,a0,a3 # now a3 is the dst address past the 1w chunks - -# copying in words (4-byte chunks) -.LwordCopy_loop: - lw t3,0(a1) # the first t3 may be equal t0 ... optimize? - addiu a1,a1,4 - addiu a0,a0,4 - bne a0,a3,.LwordCopy_loop - sw t3,-4(a0) - -# For the last (<8) bytes -.Llast8: - blez a2,.Lleave - addu a3,a0,a2 # a3 is the last dst address -.Llast8loop: - lb v1,0(a1) - addiu a1,a1,1 - addiu a0,a0,1 - bne a0,a3,.Llast8loop - sb v1,-1(a0) - -.Lleave: - j ra - nop - -# -# UNALIGNED case -# - -.Lunaligned: - # got here with a3="negu a0" - andi a3,a3,0x3 # test if the a0 is word aligned - beqz a3,.Lua_chk16w - subu a2,a2,a3 # bytes left after initial a3 bytes - - LWHI v1,0(a1) - LWLO v1,3(a1) - addu a1,a1,a3 # a3 may be here 1, 2 or 3 - SWHI v1,0(a0) - addu a0,a0,a3 # below the dst will be word aligned (NOTE1) - -.Lua_chk16w: - andi t8,a2,0x3f # any whole 64-byte chunks? - # t8 is the byte count after 64-byte chunks - beq a2,t8,.Lua_chk8w # if a2==t8, no 64-byte chunks - # There will be at most 1 32-byte chunk after it - subu a3,a2,t8 # subtract from a2 the reminder - # Here a3 counts bytes in 16w chunks - addu a3,a0,a3 # Now a3 is the final dst after 64-byte chunks - - addu t0,a0,a2 # t0 is the "past the end" address - - subu t9,t0,160 # t9 is the "last safe pref 30,128(a0)" address - - pref 0,0(a1) # bring the first line of src, addr 0 - pref 0,32(a1) # bring the second line of src, addr 32 - pref 0,64(a1) # bring the third line of src, addr 64 - pref 30,32(a0) # safe, as we have at least 64 bytes ahead -# In case the a0 > t9 don't use "pref 30" at all - sgtu v1,a0,t9 - bgtz v1,.Lua_loop16w # skip "pref 30,64(a0)" for too short arrays - nop -# otherwise, start with using pref30 - pref 30,64(a0) -.Lua_loop16w: - pref 0,96(a1) - LWHI t0,0(a1) - LWLO t0,3(a1) - LWHI t1,4(a1) - bgtz v1,.Lua_skip_pref30_96 - LWLO t1,7(a1) - pref 30,96(a0) # continue setting up the dest, addr 96 -.Lua_skip_pref30_96: - LWHI t2,8(a1) - LWLO t2,11(a1) - LWHI t3,12(a1) - LWLO t3,15(a1) - LWHI t4,16(a1) - LWLO t4,19(a1) - LWHI t5,20(a1) - LWLO t5,23(a1) - LWHI t6,24(a1) - LWLO t6,27(a1) - LWHI t7,28(a1) - LWLO t7,31(a1) - pref 0,128(a1) # bring the next lines of src, addr 128 - - sw t0,0(a0) - sw t1,4(a0) - sw t2,8(a0) - sw t3,12(a0) - sw t4,16(a0) - sw t5,20(a0) - sw t6,24(a0) - sw t7,28(a0) - - LWHI t0,32(a1) - LWLO t0,35(a1) - LWHI t1,36(a1) - bgtz v1,.Lua_skip_pref30_128 - LWLO t1,39(a1) - pref 30,128(a0) # continue setting up the dest, addr 128 -.Lua_skip_pref30_128: - LWHI t2,40(a1) - LWLO t2,43(a1) - LWHI t3,44(a1) - LWLO t3,47(a1) - LWHI t4,48(a1) - LWLO t4,51(a1) - LWHI t5,52(a1) - LWLO t5,55(a1) - LWHI t6,56(a1) - LWLO t6,59(a1) - LWHI t7,60(a1) - LWLO t7,63(a1) - pref 0, 160(a1) # bring the next lines of src, addr 160 - - sw t0,32(a0) - sw t1,36(a0) - sw t2,40(a0) - sw t3,44(a0) - sw t4,48(a0) - sw t5,52(a0) - sw t6,56(a0) - sw t7,60(a0) - - addiu a0,a0,64 # adding 64 to dest - sgtu v1,a0,t9 - bne a0,a3,.Lua_loop16w - addiu a1,a1,64 # adding 64 to src - move a2,t8 - -# Here we have src and dest word-aligned but less than 64-bytes to go - -.Lua_chk8w: - pref 0, 0x0(a1) - andi t8,a2,0x1f # is there a 32-byte chunk? - # the t8 is the reminder count - beq a2,t8,.Lua_chk1w # when a2=t8, no 32-byte chunk - nop - - LWHI t0,0(a1) - LWLO t0,3(a1) - LWHI t1,4(a1) - LWLO t1,7(a1) - LWHI t2,8(a1) - LWLO t2,11(a1) - LWHI t3,12(a1) - LWLO t3,15(a1) - LWHI t4,16(a1) - LWLO t4,19(a1) - LWHI t5,20(a1) - LWLO t5,23(a1) - LWHI t6,24(a1) - LWLO t6,27(a1) - LWHI t7,28(a1) - LWLO t7,31(a1) - addiu a1,a1,32 - - sw t0,0(a0) - sw t1,4(a0) - sw t2,8(a0) - sw t3,12(a0) - sw t4,16(a0) - sw t5,20(a0) - sw t6,24(a0) - sw t7,28(a0) - addiu a0,a0,32 - -.Lua_chk1w: - andi a2,t8,0x3 # now a2 is the reminder past 1w chunks - beq a2,t8,.Lua_smallCopy - subu a3,t8,a2 # a3 is count of bytes in 1w chunks - addu a3,a0,a3 # now a3 is the dst address past the 1w chunks - -# copying in words (4-byte chunks) -.Lua_wordCopy_loop: - LWHI v1,0(a1) - LWLO v1,3(a1) - addiu a1,a1,4 - addiu a0,a0,4 # note: dst=a0 is word aligned here, see NOTE1 - bne a0,a3,.Lua_wordCopy_loop - sw v1,-4(a0) - -# Now less than 4 bytes (value in a2) left to copy -.Lua_smallCopy: - beqz a2,.Lleave - addu a3,a0,a2 # a3 is the last dst address -.Lua_smallCopy_loop: - lb v1,0(a1) - addiu a1,a1,1 - addiu a0,a0,1 - bne a0,a3,.Lua_smallCopy_loop - sb v1,-1(a0) - - j ra - nop - - .set at - .set reorder - -END(memcpy) - - -/************************************************************************ - * Implementation : Static functions - ************************************************************************/ |