From 9895f9429cb489ba271c06767531083ae4c4bcbe Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 21 Nov 2007 22:46:14 +0900
Subject: sh: clear/copy_page renames in lib and lib64.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/lib64/Makefile     |  2 +-
 arch/sh/lib64/clear_page.S | 54 ++++++++++++++++++++++++++++
 arch/sh/lib64/copy_page.S  | 89 ++++++++++++++++++++++++++++++++++++++++++++++
 arch/sh/lib64/page_clear.S | 54 ----------------------------
 arch/sh/lib64/page_copy.S  | 89 ----------------------------------------------
 5 files changed, 144 insertions(+), 144 deletions(-)
 create mode 100644 arch/sh/lib64/clear_page.S
 create mode 100644 arch/sh/lib64/copy_page.S
 delete mode 100644 arch/sh/lib64/page_clear.S
 delete mode 100644 arch/sh/lib64/page_copy.S

(limited to 'arch/sh/lib64')

diff --git a/arch/sh/lib64/Makefile b/arch/sh/lib64/Makefile
index 2f4086a..9950966 100644
--- a/arch/sh/lib64/Makefile
+++ b/arch/sh/lib64/Makefile
@@ -11,5 +11,5 @@
 
 # Panic should really be compiled as PIC
 lib-y  := udelay.o c-checksum.o dbg.o panic.o memcpy.o copy_user_memcpy.o \
-		page_copy.o page_clear.o
+		copy_page.o clear_page.o
 
diff --git a/arch/sh/lib64/clear_page.S b/arch/sh/lib64/clear_page.S
new file mode 100644
index 0000000..007ab48
--- /dev/null
+++ b/arch/sh/lib64/clear_page.S
@@ -0,0 +1,54 @@
+/*
+   Copyright 2003 Richard Curnow, SuperH (UK) Ltd.
+
+   This file is subject to the terms and conditions of the GNU General Public
+   License.  See the file "COPYING" in the main directory of this archive
+   for more details.
+
+   Tight version of memset for the case of just clearing a page.  It turns out
+   that having the alloco's spaced out slightly due to the increment/branch
+   pair causes them to contend less for access to the cache.  Similarly,
+   keeping the stores apart from the allocos causes less contention.  => Do two
+   separate loops.  Do multiple stores per loop to amortise the
+   increment/branch cost a little.
+
+   Parameters:
+   r2 : source effective address (start of page)
+
+   Always clears 4096 bytes.
+
+   Note : alloco guarded by synco to avoid TAKum03020 erratum
+
+*/
+
+	.section .text..SHmedia32,"ax"
+	.little
+
+	.balign 8
+	.global clear_page
+clear_page:
+	pta/l 1f, tr1
+	pta/l 2f, tr2
+	ptabs/l r18, tr0
+
+	movi 4096, r7
+	add  r2, r7, r7
+	add  r2, r63, r6
+1:
+	alloco r6, 0
+	synco	! TAKum03020
+	addi	r6, 32, r6
+	bgt/l	r7, r6, tr1
+
+	add  r2, r63, r6
+2:
+	st.q  r6,   0, r63
+	st.q  r6,   8, r63
+	st.q  r6,  16, r63
+	st.q  r6,  24, r63
+	addi r6, 32, r6
+	bgt/l r7, r6, tr2
+
+	blink tr0, r63
+
+
diff --git a/arch/sh/lib64/copy_page.S b/arch/sh/lib64/copy_page.S
new file mode 100644
index 0000000..0ec6fca
--- /dev/null
+++ b/arch/sh/lib64/copy_page.S
@@ -0,0 +1,89 @@
+/*
+   Copyright 2003 Richard Curnow, SuperH (UK) Ltd.
+
+   This file is subject to the terms and conditions of the GNU General Public
+   License.  See the file "COPYING" in the main directory of this archive
+   for more details.
+
+   Tight version of mempy for the case of just copying a page.
+   Prefetch strategy empirically optimised against RTL simulations
+   of SH5-101 cut2 eval chip with Cayman board DDR memory.
+
+   Parameters:
+   r2 : destination effective address (start of page)
+   r3 : source effective address (start of page)
+
+   Always copies 4096 bytes.
+
+   Points to review.
+   * Currently the prefetch is 4 lines ahead and the alloco is 2 lines ahead.
+     It seems like the prefetch needs to be at at least 4 lines ahead to get
+     the data into the cache in time, and the allocos contend with outstanding
+     prefetches for the same cache set, so it's better to have the numbers
+     different.
+   */
+
+	.section .text..SHmedia32,"ax"
+	.little
+
+	.balign 8
+	.global copy_page
+copy_page:
+
+	/* Copy 4096 bytes worth of data from r3 to r2.
+	   Do prefetches 4 lines ahead.
+	   Do alloco 2 lines ahead */
+
+	pta 1f, tr1
+	pta 2f, tr2
+	pta 3f, tr3
+	ptabs r18, tr0
+
+#if 0
+	/* TAKum03020 */
+	ld.q r3, 0x00, r63
+	ld.q r3, 0x20, r63
+	ld.q r3, 0x40, r63
+	ld.q r3, 0x60, r63
+#endif
+	alloco r2, 0x00
+	synco		! TAKum03020
+	alloco r2, 0x20
+	synco		! TAKum03020
+
+	movi 3968, r6
+	add  r2, r6, r6
+	addi r6, 64, r7
+	addi r7, 64, r8
+	sub r3, r2, r60
+	addi r60, 8, r61
+	addi r61, 8, r62
+	addi r62, 8, r23
+	addi r60, 0x80, r22
+
+/* Minimal code size.  The extra branches inside the loop don't cost much
+   because they overlap with the time spent waiting for prefetches to
+   complete. */
+1:
+#if 0
+	/* TAKum03020 */
+	bge/u r2, r6, tr2  ! skip prefetch for last 4 lines
+	ldx.q r2, r22, r63 ! prefetch 4 lines hence
+#endif
+2:
+	bge/u r2, r7, tr3  ! skip alloco for last 2 lines
+	alloco r2, 0x40    ! alloc destination line 2 lines ahead
+	synco		! TAKum03020
+3:
+	ldx.q r2, r60, r36
+	ldx.q r2, r61, r37
+	ldx.q r2, r62, r38
+	ldx.q r2, r23, r39
+	st.q  r2,   0, r36
+	st.q  r2,   8, r37
+	st.q  r2,  16, r38
+	st.q  r2,  24, r39
+	addi r2, 32, r2
+	bgt/l r8, r2, tr1
+
+	blink tr0, r63	   ! return
diff --git a/arch/sh/lib64/page_clear.S b/arch/sh/lib64/page_clear.S
deleted file mode 100644
index 007ab48..0000000
--- a/arch/sh/lib64/page_clear.S
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
-   Copyright 2003 Richard Curnow, SuperH (UK) Ltd.
-
-   This file is subject to the terms and conditions of the GNU General Public
-   License.  See the file "COPYING" in the main directory of this archive
-   for more details.
-
-   Tight version of memset for the case of just clearing a page.  It turns out
-   that having the alloco's spaced out slightly due to the increment/branch
-   pair causes them to contend less for access to the cache.  Similarly,
-   keeping the stores apart from the allocos causes less contention.  => Do two
-   separate loops.  Do multiple stores per loop to amortise the
-   increment/branch cost a little.
-
-   Parameters:
-   r2 : source effective address (start of page)
-
-   Always clears 4096 bytes.
-
-   Note : alloco guarded by synco to avoid TAKum03020 erratum
-
-*/
-
-	.section .text..SHmedia32,"ax"
-	.little
-
-	.balign 8
-	.global clear_page
-clear_page:
-	pta/l 1f, tr1
-	pta/l 2f, tr2
-	ptabs/l r18, tr0
-
-	movi 4096, r7
-	add  r2, r7, r7
-	add  r2, r63, r6
-1:
-	alloco r6, 0
-	synco	! TAKum03020
-	addi	r6, 32, r6
-	bgt/l	r7, r6, tr1
-
-	add  r2, r63, r6
-2:
-	st.q  r6,   0, r63
-	st.q  r6,   8, r63
-	st.q  r6,  16, r63
-	st.q  r6,  24, r63
-	addi r6, 32, r6
-	bgt/l r7, r6, tr2
-
-	blink tr0, r63
-
-
diff --git a/arch/sh/lib64/page_copy.S b/arch/sh/lib64/page_copy.S
deleted file mode 100644
index 0ec6fca..0000000
--- a/arch/sh/lib64/page_copy.S
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
-   Copyright 2003 Richard Curnow, SuperH (UK) Ltd.
-
-   This file is subject to the terms and conditions of the GNU General Public
-   License.  See the file "COPYING" in the main directory of this archive
-   for more details.
-
-   Tight version of mempy for the case of just copying a page.
-   Prefetch strategy empirically optimised against RTL simulations
-   of SH5-101 cut2 eval chip with Cayman board DDR memory.
-
-   Parameters:
-   r2 : destination effective address (start of page)
-   r3 : source effective address (start of page)
-
-   Always copies 4096 bytes.
-
-   Points to review.
-   * Currently the prefetch is 4 lines ahead and the alloco is 2 lines ahead.
-     It seems like the prefetch needs to be at at least 4 lines ahead to get
-     the data into the cache in time, and the allocos contend with outstanding
-     prefetches for the same cache set, so it's better to have the numbers
-     different.
-   */
-
-	.section .text..SHmedia32,"ax"
-	.little
-
-	.balign 8
-	.global copy_page
-copy_page:
-
-	/* Copy 4096 bytes worth of data from r3 to r2.
-	   Do prefetches 4 lines ahead.
-	   Do alloco 2 lines ahead */
-
-	pta 1f, tr1
-	pta 2f, tr2
-	pta 3f, tr3
-	ptabs r18, tr0
-
-#if 0
-	/* TAKum03020 */
-	ld.q r3, 0x00, r63
-	ld.q r3, 0x20, r63
-	ld.q r3, 0x40, r63
-	ld.q r3, 0x60, r63
-#endif
-	alloco r2, 0x00
-	synco		! TAKum03020
-	alloco r2, 0x20
-	synco		! TAKum03020
-
-	movi 3968, r6
-	add  r2, r6, r6
-	addi r6, 64, r7
-	addi r7, 64, r8
-	sub r3, r2, r60
-	addi r60, 8, r61
-	addi r61, 8, r62
-	addi r62, 8, r23
-	addi r60, 0x80, r22
-
-/* Minimal code size.  The extra branches inside the loop don't cost much
-   because they overlap with the time spent waiting for prefetches to
-   complete. */
-1:
-#if 0
-	/* TAKum03020 */
-	bge/u r2, r6, tr2  ! skip prefetch for last 4 lines
-	ldx.q r2, r22, r63 ! prefetch 4 lines hence
-#endif
-2:
-	bge/u r2, r7, tr3  ! skip alloco for last 2 lines
-	alloco r2, 0x40    ! alloc destination line 2 lines ahead
-	synco		! TAKum03020
-3:
-	ldx.q r2, r60, r36
-	ldx.q r2, r61, r37
-	ldx.q r2, r62, r38
-	ldx.q r2, r23, r39
-	st.q  r2,   0, r36
-	st.q  r2,   8, r37
-	st.q  r2,  16, r38
-	st.q  r2,  24, r39
-	addi r2, 32, r2
-	bgt/l r8, r2, tr1
-
-	blink tr0, r63	   ! return
-- 
cgit v1.1