diff options
Diffstat (limited to 'libc/arch-mips/string/strcmp.S')
-rw-r--r-- | libc/arch-mips/string/strcmp.S | 260 |
1 files changed, 260 insertions, 0 deletions
diff --git a/libc/arch-mips/string/strcmp.S b/libc/arch-mips/string/strcmp.S new file mode 100644 index 0000000..2b67f5a --- /dev/null +++ b/libc/arch-mips/string/strcmp.S @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2014 + * Imagination Technologies Limited. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY IMAGINATION TECHNOLOGIES LIMITED ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IMAGINATION TECHNOLOGIES LIMITED BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef __ANDROID__ +# include <private/bionic_asm.h> +#elif _LIBC +# include <sysdep.h> +# include <regdef.h> +# include <sys/asm.h> +#elif _COMPILING_NEWLIB +# include "machine/asm.h" +# include "machine/regdef.h" +#else +# include <regdef.h> +# include <sys/asm.h> +#endif + +/* Technically strcmp should not read past the end of the strings being + compared. We will read a full word that may contain excess bits beyond + the NULL string terminator but unless ENABLE_READAHEAD is set, we will not + read the next word after the end of string. Setting ENABLE_READAHEAD will + improve performance but is technically illegal based on the definition of + strcmp. */ +#ifdef ENABLE_READAHEAD +# define DELAY_READ +#else +# define DELAY_READ nop +#endif + +/* Testing on a little endian machine showed using CLZ was a + performance loss, so we are not turning it on by default. */ +#if defined(ENABLE_CLZ) && (__mips_isa_rev > 1) +# define USE_CLZ +#endif + +/* Some asm.h files do not have the L macro definition. */ +#ifndef L +# if _MIPS_SIM == _ABIO32 +# define L(label) $L ## label +# else +# define L(label) .L ## label +# endif +#endif + +/* Some asm.h files do not have the PTR_ADDIU macro definition. */ +#ifndef PTR_ADDIU +# if _MIPS_SIM == _ABIO32 +# define PTR_ADDIU addiu +# else +# define PTR_ADDIU daddiu +# endif +#endif + +/* Allow the routine to be named something else if desired. */ +#ifndef STRCMP_NAME +# define STRCMP_NAME strcmp +#endif + +#ifdef __ANDROID__ +LEAF(STRCMP_NAME, 0) +#else +LEAF(STRCMP_NAME) +#endif + .set nomips16 + .set noreorder + + or t0, a0, a1 + andi t0,0x3 + bne t0, zero, L(byteloop) + +/* Both strings are 4 byte aligned at this point. */ + + lui t8, 0x0101 + ori t8, t8, 0x0101 + lui t9, 0x7f7f + ori t9, 0x7f7f + +#define STRCMP32(OFFSET) \ + lw v0, OFFSET(a0); \ + lw v1, OFFSET(a1); \ + subu t0, v0, t8; \ + bne v0, v1, L(worddiff); \ + nor t1, v0, t9; \ + and t0, t0, t1; \ + bne t0, zero, L(returnzero) + +L(wordloop): + STRCMP32(0) + DELAY_READ + STRCMP32(4) + DELAY_READ + STRCMP32(8) + DELAY_READ + STRCMP32(12) + DELAY_READ + STRCMP32(16) + DELAY_READ + STRCMP32(20) + DELAY_READ + STRCMP32(24) + DELAY_READ + STRCMP32(28) + PTR_ADDIU a0, a0, 32 + b L(wordloop) + PTR_ADDIU a1, a1, 32 + +L(returnzero): + j ra + move v0, zero + +L(worddiff): +#ifdef USE_CLZ + subu t0, v0, t8 + nor t1, v0, t9 + and t1, t0, t1 + xor t0, v0, v1 + or t0, t0, t1 +# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + wsbh t0, t0 + rotr t0, t0, 16 +# endif + clz t1, t0 + and t1, 0xf8 +# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + neg t1 + addu t1, 24 +# endif + rotrv v0, v0, t1 + rotrv v1, v1, t1 + and v0, v0, 0xff + and v1, v1, 0xff + j ra + subu v0, v0, v1 +#else /* USE_CLZ */ +# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + andi t0, v0, 0xff + beq t0, zero, L(wexit01) + andi t1, v1, 0xff + bne t0, t1, L(wexit01) + + srl t8, v0, 8 + srl t9, v1, 8 + andi t8, t8, 0xff + beq t8, zero, L(wexit89) + andi t9, t9, 0xff + bne t8, t9, L(wexit89) + + srl t0, v0, 16 + srl t1, v1, 16 + andi t0, t0, 0xff + beq t0, zero, L(wexit01) + andi t1, t1, 0xff + bne t0, t1, L(wexit01) + + srl t8, v0, 24 + srl t9, v1, 24 +# else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */ + srl t0, v0, 24 + beq t0, zero, L(wexit01) + srl t1, v1, 24 + bne t0, t1, L(wexit01) + + srl t8, v0, 16 + srl t9, v1, 16 + andi t8, t8, 0xff + beq t8, zero, L(wexit89) + andi t9, t9, 0xff + bne t8, t9, L(wexit89) + + srl t0, v0, 8 + srl t1, v1, 8 + andi t0, t0, 0xff + beq t0, zero, L(wexit01) + andi t1, t1, 0xff + bne t0, t1, L(wexit01) + + andi t8, v0, 0xff + andi t9, v1, 0xff +# endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */ + +L(wexit89): + j ra + subu v0, t8, t9 +L(wexit01): + j ra + subu v0, t0, t1 +#endif /* USE_CLZ */ + +/* It might seem better to do the 'beq' instruction between the two 'lbu' + instructions so that the nop is not needed but testing showed that this + code is actually faster (based on glibc strcmp test). */ +#define BYTECMP01(OFFSET) \ + lbu v0, OFFSET(a0); \ + lbu v1, OFFSET(a1); \ + beq v0, zero, L(bexit01); \ + nop; \ + bne v0, v1, L(bexit01) + +#define BYTECMP89(OFFSET) \ + lbu t8, OFFSET(a0); \ + lbu t9, OFFSET(a1); \ + beq t8, zero, L(bexit89); \ + nop; \ + bne t8, t9, L(bexit89) + +L(byteloop): + BYTECMP01(0) + BYTECMP89(1) + BYTECMP01(2) + BYTECMP89(3) + BYTECMP01(4) + BYTECMP89(5) + BYTECMP01(6) + BYTECMP89(7) + PTR_ADDIU a0, a0, 8 + b L(byteloop) + PTR_ADDIU a1, a1, 8 + +L(bexit01): + j ra + subu v0, v0, v1 +L(bexit89): + j ra + subu v0, t8, t9 + + .set at + .set reorder + +END(STRCMP_NAME) +#ifndef __ANDROID__ +# ifdef _LIBC +libc_hidden_builtin_def (STRCMP_NAME) +# endif +#endif |