diff options
author | The Android Open Source Project <initial-contribution@android.com> | 2009-03-03 18:28:13 -0800 |
---|---|---|
committer | The Android Open Source Project <initial-contribution@android.com> | 2009-03-03 18:28:13 -0800 |
commit | 1767f908af327fa388b1c66883760ad851267013 (patch) | |
tree | 4b825dc642cb6eb9a060e54bf8d69288fbee4904 /libc/arch-arm/bionic/memcmp.S | |
parent | a799b53f10e5a6fd51fef4436cfb7ec99836a516 (diff) | |
download | bionic-1767f908af327fa388b1c66883760ad851267013.zip bionic-1767f908af327fa388b1c66883760ad851267013.tar.gz bionic-1767f908af327fa388b1c66883760ad851267013.tar.bz2 |
auto import from //depot/cupcake/@135843
Diffstat (limited to 'libc/arch-arm/bionic/memcmp.S')
-rw-r--r-- | libc/arch-arm/bionic/memcmp.S | 285 |
1 files changed, 0 insertions, 285 deletions
diff --git a/libc/arch-arm/bionic/memcmp.S b/libc/arch-arm/bionic/memcmp.S deleted file mode 100644 index f45b56b..0000000 --- a/libc/arch-arm/bionic/memcmp.S +++ /dev/null @@ -1,285 +0,0 @@ -/* - * Copyright (C) 2008 The Android Open Source Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <machine/cpu-features.h> - - .text - - .global memcmp - .type memcmp, %function - .align 4 - -/* - * Optimized memcmp() for ARM9. - * This would not be optimal on XScale or ARM11, where more prefetching - * and use of PLD will be needed. - * The 2 major optimzations here are - * (1) The main loop compares 16 bytes at a time - * (2) The loads are scheduled in a way they won't stall - */ - -memcmp: - PLD (r0, #0) - PLD (r1, #0) - - /* take of the case where length is 0 or the buffers are the same */ - cmp r0, r1 - cmpne r2, #0 - moveq r0, #0 - bxeq lr - - /* save registers */ - stmfd sp!, {r4, lr} - - PLD (r0, #32) - PLD (r1, #32) - - /* since r0 hold the result, move the first source - * pointer somewhere else - */ - - mov r4, r0 - - /* make sure we have at least 8+4 bytes, this simplify things below - * and avoid some overhead for small blocks - */ - cmp r2, #(8+4) - bmi 8f - - /* align first pointer to word boundary - * offset = -src & 3 - */ - rsb r3, r4, #0 - ands r3, r3, #3 - beq 0f - - /* align first pointer */ - sub r2, r2, r3 -1: ldrb r0, [r4], #1 - ldrb ip, [r1], #1 - subs r0, r0, ip - bne 9f - subs r3, r3, #1 - bne 1b - - -0: /* here the first pointer is aligned, and we have at least 4 bytes - * to process. - */ - - /* see if the pointers are congruent */ - eor r0, r4, r1 - ands r0, r0, #3 - bne 5f - - /* congruent case, 32 bytes per iteration - * We need to make sure there are at least 32+4 bytes left - * because we effectively read ahead one word, and we could - * read past the buffer (and segfault) if we're not careful. - */ - - ldr ip, [r1] - subs r2, r2, #(32 + 4) - bmi 1f - -0: PLD (r4, #64) - PLD (r1, #64) - ldr r0, [r4], #4 - ldr lr, [r1, #4]! - eors r0, r0, ip - ldreq r0, [r4], #4 - ldreq ip, [r1, #4]! - eoreqs r0, r0, lr - ldreq r0, [r4], #4 - ldreq lr, [r1, #4]! - eoreqs r0, r0, ip - ldreq r0, [r4], #4 - ldreq ip, [r1, #4]! - eoreqs r0, r0, lr - ldreq r0, [r4], #4 - ldreq lr, [r1, #4]! - eoreqs r0, r0, ip - ldreq r0, [r4], #4 - ldreq ip, [r1, #4]! - eoreqs r0, r0, lr - ldreq r0, [r4], #4 - ldreq lr, [r1, #4]! - eoreqs r0, r0, ip - ldreq r0, [r4], #4 - ldreq ip, [r1, #4]! - eoreqs r0, r0, lr - bne 2f - subs r2, r2, #32 - bhs 0b - - /* do we have at least 4 bytes left? */ -1: adds r2, r2, #(32 - 4 + 4) - bmi 4f - - /* finish off 4 bytes at a time */ -3: ldr r0, [r4], #4 - ldr ip, [r1], #4 - eors r0, r0, ip - bne 2f - subs r2, r2, #4 - bhs 3b - - /* are we done? */ -4: adds r2, r2, #4 - moveq r0, #0 - beq 9f - - /* finish off the remaining bytes */ - b 8f - -2: /* the last 4 bytes are different, restart them */ - sub r4, r4, #4 - sub r1, r1, #4 - mov r2, #4 - - /* process the last few bytes */ -8: ldrb r0, [r4], #1 - ldrb ip, [r1], #1 - // stall - subs r0, r0, ip - bne 9f - subs r2, r2, #1 - bne 8b - -9: /* restore registers and return */ - ldmfd sp!, {r4, lr} - bx lr - - - - - -5: /*************** non-congruent case ***************/ - and r0, r1, #3 - cmp r0, #2 - bne 4f - - /* here, offset is 2 (16-bits aligned, special cased) */ - - /* make sure we have at least 16 bytes to process */ - subs r2, r2, #16 - addmi r2, r2, #16 - bmi 8b - - /* align the unaligned pointer */ - bic r1, r1, #3 - ldr lr, [r1], #4 - -6: PLD (r1, #64) - PLD (r4, #64) - mov ip, lr, lsr #16 - ldr lr, [r1], #4 - ldr r0, [r4], #4 - orr ip, ip, lr, lsl #16 - eors r0, r0, ip - moveq ip, lr, lsr #16 - ldreq lr, [r1], #4 - ldreq r0, [r4], #4 - orreq ip, ip, lr, lsl #16 - eoreqs r0, r0, ip - moveq ip, lr, lsr #16 - ldreq lr, [r1], #4 - ldreq r0, [r4], #4 - orreq ip, ip, lr, lsl #16 - eoreqs r0, r0, ip - moveq ip, lr, lsr #16 - ldreq lr, [r1], #4 - ldreq r0, [r4], #4 - orreq ip, ip, lr, lsl #16 - eoreqs r0, r0, ip - bne 7f - subs r2, r2, #16 - bhs 6b - sub r1, r1, #2 - /* are we done? */ - adds r2, r2, #16 - moveq r0, #0 - beq 9b - /* finish off the remaining bytes */ - b 8b - -7: /* fix up the 2 pointers and fallthrough... */ - sub r1, r1, #(4+2) - sub r4, r4, #4 - mov r2, #4 - b 8b - - -4: /*************** offset is 1 or 3 (less optimized) ***************/ - - stmfd sp!, {r5, r6, r7} - - // r5 = rhs - // r6 = lhs - // r7 = scratch - - mov r5, r0, lsl #3 /* r5 = right shift */ - rsb r6, r5, #32 /* r6 = left shift */ - - /* align the unaligned pointer */ - bic r1, r1, #3 - ldr r7, [r1], #4 - sub r2, r2, #8 - -6: mov ip, r7, lsr r5 - ldr r7, [r1], #4 - ldr r0, [r4], #4 - orr ip, ip, r7, lsl r6 - eors r0, r0, ip - moveq ip, r7, lsr r5 - ldreq r7, [r1], #4 - ldreq r0, [r4], #4 - orreq ip, ip, r7, lsl r6 - eoreqs r0, r0, ip - bne 7f - subs r2, r2, #8 - bhs 6b - - sub r1, r1, r6, lsr #3 - ldmfd sp!, {r5, r6, r7} - - /* are we done? */ - adds r2, r2, #8 - moveq r0, #0 - beq 9b - - /* finish off the remaining bytes */ - b 8b - -7: /* fix up the 2 pointers and fallthrough... */ - sub r1, r1, #4 - sub r1, r1, r6, lsr #3 - sub r4, r4, #4 - mov r2, #4 - ldmfd sp!, {r5, r6, r7} - b 8b |