summaryrefslogtreecommitdiffstats
path: root/libc/arch-arm/bionic/memcmp.S
diff options
context:
space:
mode:
Diffstat (limited to 'libc/arch-arm/bionic/memcmp.S')
-rw-r--r--libc/arch-arm/bionic/memcmp.S285
1 files changed, 0 insertions, 285 deletions
diff --git a/libc/arch-arm/bionic/memcmp.S b/libc/arch-arm/bionic/memcmp.S
deleted file mode 100644
index f45b56b..0000000
--- a/libc/arch-arm/bionic/memcmp.S
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * Copyright (C) 2008 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <machine/cpu-features.h>
-
- .text
-
- .global memcmp
- .type memcmp, %function
- .align 4
-
-/*
- * Optimized memcmp() for ARM9.
- * This would not be optimal on XScale or ARM11, where more prefetching
- * and use of PLD will be needed.
- * The 2 major optimzations here are
- * (1) The main loop compares 16 bytes at a time
- * (2) The loads are scheduled in a way they won't stall
- */
-
-memcmp:
- PLD (r0, #0)
- PLD (r1, #0)
-
- /* take of the case where length is 0 or the buffers are the same */
- cmp r0, r1
- cmpne r2, #0
- moveq r0, #0
- bxeq lr
-
- /* save registers */
- stmfd sp!, {r4, lr}
-
- PLD (r0, #32)
- PLD (r1, #32)
-
- /* since r0 hold the result, move the first source
- * pointer somewhere else
- */
-
- mov r4, r0
-
- /* make sure we have at least 8+4 bytes, this simplify things below
- * and avoid some overhead for small blocks
- */
- cmp r2, #(8+4)
- bmi 8f
-
- /* align first pointer to word boundary
- * offset = -src & 3
- */
- rsb r3, r4, #0
- ands r3, r3, #3
- beq 0f
-
- /* align first pointer */
- sub r2, r2, r3
-1: ldrb r0, [r4], #1
- ldrb ip, [r1], #1
- subs r0, r0, ip
- bne 9f
- subs r3, r3, #1
- bne 1b
-
-
-0: /* here the first pointer is aligned, and we have at least 4 bytes
- * to process.
- */
-
- /* see if the pointers are congruent */
- eor r0, r4, r1
- ands r0, r0, #3
- bne 5f
-
- /* congruent case, 32 bytes per iteration
- * We need to make sure there are at least 32+4 bytes left
- * because we effectively read ahead one word, and we could
- * read past the buffer (and segfault) if we're not careful.
- */
-
- ldr ip, [r1]
- subs r2, r2, #(32 + 4)
- bmi 1f
-
-0: PLD (r4, #64)
- PLD (r1, #64)
- ldr r0, [r4], #4
- ldr lr, [r1, #4]!
- eors r0, r0, ip
- ldreq r0, [r4], #4
- ldreq ip, [r1, #4]!
- eoreqs r0, r0, lr
- ldreq r0, [r4], #4
- ldreq lr, [r1, #4]!
- eoreqs r0, r0, ip
- ldreq r0, [r4], #4
- ldreq ip, [r1, #4]!
- eoreqs r0, r0, lr
- ldreq r0, [r4], #4
- ldreq lr, [r1, #4]!
- eoreqs r0, r0, ip
- ldreq r0, [r4], #4
- ldreq ip, [r1, #4]!
- eoreqs r0, r0, lr
- ldreq r0, [r4], #4
- ldreq lr, [r1, #4]!
- eoreqs r0, r0, ip
- ldreq r0, [r4], #4
- ldreq ip, [r1, #4]!
- eoreqs r0, r0, lr
- bne 2f
- subs r2, r2, #32
- bhs 0b
-
- /* do we have at least 4 bytes left? */
-1: adds r2, r2, #(32 - 4 + 4)
- bmi 4f
-
- /* finish off 4 bytes at a time */
-3: ldr r0, [r4], #4
- ldr ip, [r1], #4
- eors r0, r0, ip
- bne 2f
- subs r2, r2, #4
- bhs 3b
-
- /* are we done? */
-4: adds r2, r2, #4
- moveq r0, #0
- beq 9f
-
- /* finish off the remaining bytes */
- b 8f
-
-2: /* the last 4 bytes are different, restart them */
- sub r4, r4, #4
- sub r1, r1, #4
- mov r2, #4
-
- /* process the last few bytes */
-8: ldrb r0, [r4], #1
- ldrb ip, [r1], #1
- // stall
- subs r0, r0, ip
- bne 9f
- subs r2, r2, #1
- bne 8b
-
-9: /* restore registers and return */
- ldmfd sp!, {r4, lr}
- bx lr
-
-
-
-
-
-5: /*************** non-congruent case ***************/
- and r0, r1, #3
- cmp r0, #2
- bne 4f
-
- /* here, offset is 2 (16-bits aligned, special cased) */
-
- /* make sure we have at least 16 bytes to process */
- subs r2, r2, #16
- addmi r2, r2, #16
- bmi 8b
-
- /* align the unaligned pointer */
- bic r1, r1, #3
- ldr lr, [r1], #4
-
-6: PLD (r1, #64)
- PLD (r4, #64)
- mov ip, lr, lsr #16
- ldr lr, [r1], #4
- ldr r0, [r4], #4
- orr ip, ip, lr, lsl #16
- eors r0, r0, ip
- moveq ip, lr, lsr #16
- ldreq lr, [r1], #4
- ldreq r0, [r4], #4
- orreq ip, ip, lr, lsl #16
- eoreqs r0, r0, ip
- moveq ip, lr, lsr #16
- ldreq lr, [r1], #4
- ldreq r0, [r4], #4
- orreq ip, ip, lr, lsl #16
- eoreqs r0, r0, ip
- moveq ip, lr, lsr #16
- ldreq lr, [r1], #4
- ldreq r0, [r4], #4
- orreq ip, ip, lr, lsl #16
- eoreqs r0, r0, ip
- bne 7f
- subs r2, r2, #16
- bhs 6b
- sub r1, r1, #2
- /* are we done? */
- adds r2, r2, #16
- moveq r0, #0
- beq 9b
- /* finish off the remaining bytes */
- b 8b
-
-7: /* fix up the 2 pointers and fallthrough... */
- sub r1, r1, #(4+2)
- sub r4, r4, #4
- mov r2, #4
- b 8b
-
-
-4: /*************** offset is 1 or 3 (less optimized) ***************/
-
- stmfd sp!, {r5, r6, r7}
-
- // r5 = rhs
- // r6 = lhs
- // r7 = scratch
-
- mov r5, r0, lsl #3 /* r5 = right shift */
- rsb r6, r5, #32 /* r6 = left shift */
-
- /* align the unaligned pointer */
- bic r1, r1, #3
- ldr r7, [r1], #4
- sub r2, r2, #8
-
-6: mov ip, r7, lsr r5
- ldr r7, [r1], #4
- ldr r0, [r4], #4
- orr ip, ip, r7, lsl r6
- eors r0, r0, ip
- moveq ip, r7, lsr r5
- ldreq r7, [r1], #4
- ldreq r0, [r4], #4
- orreq ip, ip, r7, lsl r6
- eoreqs r0, r0, ip
- bne 7f
- subs r2, r2, #8
- bhs 6b
-
- sub r1, r1, r6, lsr #3
- ldmfd sp!, {r5, r6, r7}
-
- /* are we done? */
- adds r2, r2, #8
- moveq r0, #0
- beq 9b
-
- /* finish off the remaining bytes */
- b 8b
-
-7: /* fix up the 2 pointers and fallthrough... */
- sub r1, r1, #4
- sub r1, r1, r6, lsr #3
- sub r4, r4, #4
- mov r2, #4
- ldmfd sp!, {r5, r6, r7}
- b 8b