summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrent DeGraaf <bdegraaf@codeaurora.org>2014-07-08 16:59:13 -0400
committerSteve Kondik <shade@chemlab.org>2014-07-30 23:50:47 -0700
commit18a8907c4efb82f839959cce3cec442a96d87f8e (patch)
tree694275d02da66ba65b7fc6640ffb1e7fdc7fac79
parenta4fa620298965ae70a8611b02490253d4a360bd9 (diff)
downloadbionic-18a8907c4efb82f839959cce3cec442a96d87f8e.zip
bionic-18a8907c4efb82f839959cce3cec442a96d87f8e.tar.gz
bionic-18a8907c4efb82f839959cce3cec442a96d87f8e.tar.bz2
bionic: update memmove for 32 bits atomic
When src/dst are 32bits aligned, the updated memmove will guarantee 32bits atomic. Change-Id: I21cb77451270d061b32e3e2d2fda22e7e373b7ff
-rw-r--r--libc/arch-arm/krait/bionic/memmove.S94
1 files changed, 63 insertions, 31 deletions
diff --git a/libc/arch-arm/krait/bionic/memmove.S b/libc/arch-arm/krait/bionic/memmove.S
index b7b77ce..24fcec2 100644
--- a/libc/arch-arm/krait/bionic/memmove.S
+++ b/libc/arch-arm/krait/bionic/memmove.S
@@ -1,5 +1,5 @@
/***************************************************************************
- Copyright (c) 2009-2013 The Linux Foundation. All rights reserved.
+ Copyright (c) 2009-2014 The Linux Foundation. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
@@ -84,7 +84,7 @@ _memmove_words:
.save {r0, lr}
cmp r2, #0
it ne
- subsne r12, r0, r1
+ subsne r12, r0, r1 // Warning: do not combine these "it" blocks
it eq
bxeq lr
// memmove only if r1 < r0 < r1+r2
@@ -94,15 +94,28 @@ _memmove_words:
cmpge r12, r0
it le
ble memcpy
- cmp r2, #63
- ble .Lneon_b2f_smallcopy
+ cmp r2, #4
+ it le
+ ble .Lneon_b2f_smallcopy_loop
push {r0, lr}
add r0, r0, r2
add r1, r1, r2
+ cmp r2, #64
+ it ge
+ bge .Lneon_b2f_copy_64
+ cmp r2, #32
+ it ge
+ bge .Lneon_b2f_copy_32
+ cmp r2, #8
+ it ge
+ bge .Lneon_b2f_copy_8
+ b .Lneon_b2f_copy_1
+.Lneon_b2f_copy_64:
mov r12, r2, lsr #6
add r0, r0, #32
add r1, r1, #32
cmp r12, #PLDTHRESH
+ it le
ble .Lneon_b2f_copy_64_loop_nopld
sub r12, #PLDOFFS
sub lr, r1, #(PLDOFFS)*PLDSIZE
@@ -116,6 +129,7 @@ _memmove_words:
subs r12, r12, #1
vst1.32 {q0, q1}, [r0]!
vst1.32 {q2, q3}, [r0]
+ it ne
bne .Lneon_b2f_copy_64_loop_outer
mov r12, #PLDOFFS
.Lneon_b2f_copy_64_loop_nopld:
@@ -126,12 +140,15 @@ _memmove_words:
subs r12, r12, #1
vst1.32 {q8, q9}, [r0]!
vst1.32 {q10, q11}, [r0]
+ it ne
bne .Lneon_b2f_copy_64_loop_nopld
ands r2, r2, #0x3f
+ it eq
beq .Lneon_memmove_done
sub r1, r1, #32
sub r0, r0, #32
cmp r2, #32
+ it lt
blt .Lneon_b2f_copy_8
.Lneon_b2f_copy_32:
sub r1, r1, #32
@@ -139,9 +156,11 @@ _memmove_words:
vld1.32 {q0, q1}, [r1]
vst1.32 {q0, q1}, [r0]
ands r2, r2, #0x1f
+ it eq
beq .Lneon_memmove_done
.Lneon_b2f_copy_8:
movs r12, r2, lsr #0x3
+ it eq
beq .Lneon_b2f_copy_1
.Lneon_b2f_copy_8_loop:
sub r1, r1, #8
@@ -149,39 +168,52 @@ _memmove_words:
vld1.32 {d0}, [r1]
subs r12, r12, #1
vst1.32 {d0}, [r0]
+ it ne
bne .Lneon_b2f_copy_8_loop
ands r2, r2, #0x7
beq .Lneon_memmove_done
.Lneon_b2f_copy_1:
- sub r1, r1, r2
- sub r0, r0, r2
- ands r12, r2, #1
- beq .Lneon_b2f_copy_halfword_loop
- subs r2, r2, #1
- ldrb r3, [r1, r2]
- strb r3, [r0, r2]
- beq .Lneon_memmove_done
-.Lneon_b2f_copy_halfword_loop:
- subs r2, r2, #2
- ldrh r3, [r1, r2]
- strh r3, [r0, r2]
- bne .Lneon_b2f_copy_halfword_loop
+ movs r12, r2, lsl #29
+ itttt mi
+ submi r1, r1, #4
+ submi r0, r0, #4
+ ldrmi r3, [r1]
+ strmi r3, [r0]
+ movs r2, r2, lsl #31
+ itttt cs
+ subcs r1, r1, #2
+ subcs r0, r0, #2
+ ldrhcs r3, [r1]
+ strhcs r3, [r0]
+ itttt mi
+ submi r1, r1, #1
+ submi r0, r0, #1
+ ldrbmi r12, [r1]
+ strbmi r12, [r0]
.Lneon_memmove_done:
pop {r0, pc}
-.Lneon_b2f_smallcopy:
- ands r12, r2, #1
- beq .Lneon_b2f_halfword_small_loop
- subs r2, r2, #1
- ldrb r3, [r1, r2]
- strb r3, [r0, r2]
- it eq
- bxeq lr
-.Lneon_b2f_halfword_small_loop:
- subs r2, r2, #2
- ldrh r3, [r1, r2]
- strh r3, [r0, r2]
- bne .Lneon_b2f_halfword_small_loop
+.Lneon_b2f_smallcopy_loop:
+ // 4 bytes or less
+ add r1, r1, r2
+ add r0, r0, r2
+ movs r12, r2, lsl #29
+ itttt mi
+ submi r1, r1, #4
+ submi r0, r0, #4
+ ldrmi r3, [r1]
+ strmi r3, [r0]
+ movs r2, r2, lsl #31
+ itttt cs
+ subcs r1, r1, #2
+ subcs r0, r0, #2
+ ldrhcs r3, [r1]
+ strhcs r3, [r0]
+ itttt mi
+ submi r1, r1, #1
+ submi r0, r0, #1
+ ldrbmi r12, [r1]
+ strbmi r12, [r0]
bx lr
- .cfi_endproc
+ .cfi_endproc
END(memmove)