Remove NEON optimizations for memcpy

author: David 'Digit' Turner <digit@google.com> 2009-09-27 07:08:46 -0700
committer: David 'Digit' Turner <digit@google.com> 2009-09-27 07:08:46 -0700
commit: f355096a64b74c8e869527de55f7e908873e3128 (patch)
tree: 914ca013c2b4979b97c639c301a70c41a4785a4a
parent: bc10cd2900cdb7fed077163b6a33e0f8572b2b19 (diff)
download: bionic-f355096a64b74c8e869527de55f7e908873e3128.zip
bionic-f355096a64b74c8e869527de55f7e908873e3128.tar.gz
bionic-f355096a64b74c8e869527de55f7e908873e3128.tar.bz2
1 files changed, 0 insertions, 107 deletions
diff --git a/libc/arch-arm/bionic/memcpy.S b/libc/arch-arm/bionic/memcpy.S
index 4ea2c6d..fcb58cd 100644
--- a/libc/arch-arm/bionic/memcpy.S
+++ b/libc/arch-arm/bionic/memcpy.S
@@ -28,111 +28,6 @@
 
 #include <machine/cpu-features.h>
 
-#if __ARM_ARCH__ == 7 || defined(__ARM_NEON__)
-
-		.text
-		.fpu    neon
-
-		.global memcpy
-		.type memcpy, %function
-		.align 4
-
-#define NEON_MAX_PREFETCH_DISTANCE 320
-
-memcpy:
-        .fnstart
-		mov	ip, r0
-		cmp	r2, #16
-		blt     4f	@ Have less than 16 bytes to copy
-
-		@ First ensure 16 byte alignment for the destination buffer
-		tst	r0, #0xF
-		beq	2f
-		tst	r0, #1
-		ldrneb	r3, [r1], #1
-		strneb	r3, [ip], #1
-		subne	r2, r2, #1
-		tst	ip, #2
-		ldrneb	r3, [r1], #1
-		strneb	r3, [ip], #1
-		ldrneb	r3, [r1], #1
-		strneb	r3, [ip], #1
-		subne	r2, r2, #2
-
-		tst	ip, #4
-		beq	1f
-		vld4.8	{d0[0], d1[0], d2[0], d3[0]}, [r1]!
-		vst4.8	{d0[0], d1[0], d2[0], d3[0]}, [ip, :32]!
-		sub	r2, r2, #4
-1:
-		tst	ip, #8
-		beq	2f
-		vld1.8	{d0}, [r1]!
-		vst1.8	{d0}, [ip, :64]!
-		sub	r2, r2, #8
-2:
-		subs	r2, r2, #32
-		blt	3f
-		mov	r3, #32
-
-		@ Main copy loop, 32 bytes are processed per iteration.
-		@ ARM instructions are used for doing fine-grained prefetch,
-		@ increasing prefetch distance progressively up to
-		@ NEON_MAX_PREFETCH_DISTANCE at runtime
-1:
-		vld1.8	{d0-d3}, [r1]!
-		cmp	r3, #(NEON_MAX_PREFETCH_DISTANCE - 32)
-		pld	[r1, r3]
-		addle	r3, r3, #32
-		vst1.8	{d0-d3}, [ip, :128]!
-		sub	r2, r2, #32
-		cmp	r2, r3
-		bge	1b
-		cmp	r2, #0
-		blt	3f
-1:		@ Copy the remaining part of the buffer (already prefetched)
-		vld1.8	{d0-d3}, [r1]!
-		subs	r2, r2, #32
-		vst1.8	{d0-d3}, [ip, :128]!
-		bge	1b
-3:		@ Copy up to 31 remaining bytes
-		tst	r2, #16
-		beq	4f
-		vld1.8	{d0, d1}, [r1]!
-		vst1.8	{d0, d1}, [ip, :128]!
-4:
-		@ Use ARM instructions exclusively for the final trailing part
-		@ not fully fitting into full 16 byte aligned block in order
-		@ to avoid "ARM store after NEON store" hazard. Also NEON
-		@ pipeline will be (mostly) flushed by the time when the
-		@ control returns to the caller, making the use of NEON mostly
-		@ transparent (and avoiding hazards in the caller code)
-
-		movs	r3, r2, lsl #29
-		bcc	1f
-	.rept	8
-		ldrcsb	r3, [r1], #1
-		strcsb	r3, [ip], #1
-	.endr
-1:
-		bpl	1f
-	.rept	4
-		ldrmib	r3, [r1], #1
-		strmib	r3, [ip], #1
-	.endr
-1:
-		movs	r2, r2, lsl #31
-		ldrcsb	r3, [r1], #1
-		strcsb	r3, [ip], #1
-		ldrcsb	r3, [r1], #1
-		strcsb	r3, [ip], #1
-		ldrmib	r3, [r1], #1
-		strmib	r3, [ip], #1
-		bx	lr
-        .fnend
-
-#else	/* __ARM_ARCH__ < 7 */
-
 	.text
 
     .global memcpy
@@ -490,5 +385,3 @@ copy_last_3_and_return:
 		bx			lr
         .fnend
 
-#endif
-
author	David 'Digit' Turner <digit@google.com>	2009-09-27 07:08:46 -0700
committer	David 'Digit' Turner <digit@google.com>	2009-09-27 07:08:46 -0700
commit	f355096a64b74c8e869527de55f7e908873e3128 (patch)
tree	914ca013c2b4979b97c639c301a70c41a4785a4a
parent	bc10cd2900cdb7fed077163b6a33e0f8572b2b19 (diff)
download	bionic-f355096a64b74c8e869527de55f7e908873e3128.zip bionic-f355096a64b74c8e869527de55f7e908873e3128.tar.gz bionic-f355096a64b74c8e869527de55f7e908873e3128.tar.bz2