summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNaresh Babu Saladi <c_nsalad@quicinc.com>2009-06-12 20:38:38 -0700
committerNaresh Babu Saladi <c_nsalad@quicinc.com>2009-06-15 13:39:12 -0700
commit1d02cb6addf1cdeacb903d8bdabdd918b9d2d52a (patch)
tree56bf5562b7ab1e1e2ff4fb3b1dad32df8c47c532
parent955c88d2aa4fa0d2d7df01c2ea123fae588d297f (diff)
downloadbionic-Q8650BSDCANLYA3160.zip
bionic-Q8650BSDCANLYA3160.tar.gz
bionic-Q8650BSDCANLYA3160.tar.bz2
memcpy: Optimize memcpy arm assembly codeQ8650BSDCANLYA3160
Improve branch prediction and remove unnecessary preloads in memcpy arm assembly code.
-rw-r--r--libc/arch-arm/bionic/memcpy.S20
1 files changed, 6 insertions, 14 deletions
diff --git a/libc/arch-arm/bionic/memcpy.S b/libc/arch-arm/bionic/memcpy.S
index fcb58cd..1e8f5f6 100644
--- a/libc/arch-arm/bionic/memcpy.S
+++ b/libc/arch-arm/bionic/memcpy.S
@@ -1,6 +1,7 @@
/*
* Copyright (C) 2008 The Android Open Source Project
* All rights reserved.
+ * Copyright (c) 2009, Code Aurora Forum. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -123,7 +124,7 @@ cached_aligned32:
blo less_than_32_left
/*
- * We preload a cache-line up to 64 bytes ahead. On the 926, this will
+ * We preload a cache-line up to 32 bytes ahead. On the 926, this will
* stall only until the requested world is fetched, but the linefill
* continues in the the background.
* While the linefill is going, we write our previous cache-line
@@ -139,24 +140,15 @@ cached_aligned32:
*
*/
- // Align the preload register to a cache-line because the cpu does
- // "critical word first" (the first word requested is loaded first).
- bic r12, r1, #0x1F
- add r12, r12, #64
-
1: ldmia r1!, { r4-r11 }
- PLD (r12, #64)
+ PLD [r12, #32]
subs r2, r2, #32
- // NOTE: if r12 is more than 64 ahead of r1, the following ldrhi
- // for ARM9 preload will not be safely guarded by the preceding subs.
- // When it is safely guarded the only possibility to have SIGSEGV here
- // is because the caller overstates the length.
- ldrhi r3, [r12], #32 /* cheap ARM9 preload */
stmia r0!, { r4-r11 }
- bhs 1b
+ blo 2f
+ b 1b
- add r2, r2, #32
+2: add r2, r2, #32