diff options
author | Naresh Babu Saladi <c_nsalad@quicinc.com> | 2009-06-12 20:38:38 -0700 |
---|---|---|
committer | Naresh Babu Saladi <c_nsalad@quicinc.com> | 2009-06-15 13:39:12 -0700 |
commit | 1d02cb6addf1cdeacb903d8bdabdd918b9d2d52a (patch) | |
tree | 56bf5562b7ab1e1e2ff4fb3b1dad32df8c47c532 | |
parent | 955c88d2aa4fa0d2d7df01c2ea123fae588d297f (diff) | |
download | bionic-Q8650BSDCANLYA3160.zip bionic-Q8650BSDCANLYA3160.tar.gz bionic-Q8650BSDCANLYA3160.tar.bz2 |
memcpy: Optimize memcpy arm assembly codeQ8650BSDCANLYA3160
Improve branch prediction and remove unnecessary preloads
in memcpy arm assembly code.
-rw-r--r-- | libc/arch-arm/bionic/memcpy.S | 20 |
1 files changed, 6 insertions, 14 deletions
diff --git a/libc/arch-arm/bionic/memcpy.S b/libc/arch-arm/bionic/memcpy.S index fcb58cd..1e8f5f6 100644 --- a/libc/arch-arm/bionic/memcpy.S +++ b/libc/arch-arm/bionic/memcpy.S @@ -1,6 +1,7 @@ /* * Copyright (C) 2008 The Android Open Source Project * All rights reserved. + * Copyright (c) 2009, Code Aurora Forum. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -123,7 +124,7 @@ cached_aligned32: blo less_than_32_left /* - * We preload a cache-line up to 64 bytes ahead. On the 926, this will + * We preload a cache-line up to 32 bytes ahead. On the 926, this will * stall only until the requested world is fetched, but the linefill * continues in the the background. * While the linefill is going, we write our previous cache-line @@ -139,24 +140,15 @@ cached_aligned32: * */ - // Align the preload register to a cache-line because the cpu does - // "critical word first" (the first word requested is loaded first). - bic r12, r1, #0x1F - add r12, r12, #64 - 1: ldmia r1!, { r4-r11 } - PLD (r12, #64) + PLD [r12, #32] subs r2, r2, #32 - // NOTE: if r12 is more than 64 ahead of r1, the following ldrhi - // for ARM9 preload will not be safely guarded by the preceding subs. - // When it is safely guarded the only possibility to have SIGSEGV here - // is because the caller overstates the length. - ldrhi r3, [r12], #32 /* cheap ARM9 preload */ stmia r0!, { r4-r11 } - bhs 1b + blo 2f + b 1b - add r2, r2, #32 +2: add r2, r2, #32 |