summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--libc/arch-arm/arm.mk1
-rw-r--r--libc/arch-arm/cortex-a15/cortex-a15.mk1
-rw-r--r--libc/arch-arm/cortex-a9/cortex-a9.mk1
-rw-r--r--libc/arch-arm/denver/bionic/memmove.S281
-rw-r--r--libc/arch-arm/denver/denver.mk5
-rw-r--r--libc/arch-arm/generic/generic.mk1
-rw-r--r--libc/arch-arm/krait/krait.mk3
-rw-r--r--tests/string_test.cpp66
8 files changed, 355 insertions, 4 deletions
diff --git a/libc/arch-arm/arm.mk b/libc/arch-arm/arm.mk
index 06b1675..3821854 100644
--- a/libc/arch-arm/arm.mk
+++ b/libc/arch-arm/arm.mk
@@ -13,7 +13,6 @@ libc_bionic_src_files_arm := \
libc_common_src_files_arm += \
bionic/index.cpp \
bionic/memchr.c \
- bionic/memmove.c.arm \
bionic/memrchr.c \
bionic/strchr.cpp \
bionic/strnlen.c \
diff --git a/libc/arch-arm/cortex-a15/cortex-a15.mk b/libc/arch-arm/cortex-a15/cortex-a15.mk
index d0896af..552811e 100644
--- a/libc/arch-arm/cortex-a15/cortex-a15.mk
+++ b/libc/arch-arm/cortex-a15/cortex-a15.mk
@@ -7,3 +7,4 @@ libc_bionic_src_files_arm += \
arch-arm/cortex-a15/bionic/strlen.S \
arch-arm/cortex-a15/bionic/__strcat_chk.S \
arch-arm/cortex-a15/bionic/__strcpy_chk.S \
+ bionic/memmove.c \
diff --git a/libc/arch-arm/cortex-a9/cortex-a9.mk b/libc/arch-arm/cortex-a9/cortex-a9.mk
index e15602b..9b99387 100644
--- a/libc/arch-arm/cortex-a9/cortex-a9.mk
+++ b/libc/arch-arm/cortex-a9/cortex-a9.mk
@@ -7,3 +7,4 @@ libc_bionic_src_files_arm += \
arch-arm/cortex-a9/bionic/strlen.S \
arch-arm/cortex-a9/bionic/__strcat_chk.S \
arch-arm/cortex-a9/bionic/__strcpy_chk.S \
+ bionic/memmove.c \
diff --git a/libc/arch-arm/denver/bionic/memmove.S b/libc/arch-arm/denver/bionic/memmove.S
new file mode 100644
index 0000000..132190b
--- /dev/null
+++ b/libc/arch-arm/denver/bionic/memmove.S
@@ -0,0 +1,281 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ * Copyright (c) 2013-2014 NVIDIA Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <private/bionic_asm.h>
+#include <private/libc_events.h>
+
+ .text
+ .syntax unified
+ .fpu neon
+
+#define CACHE_LINE_SIZE (64)
+#define MEMCPY_BLOCK_SIZE_SMALL (32768)
+#define MEMCPY_BLOCK_SIZE_MID (1048576)
+#define PREFETCH_DISTANCE_NEAR (CACHE_LINE_SIZE*4)
+#define PREFETCH_DISTANCE_MID (CACHE_LINE_SIZE*4)
+#define PREFETCH_DISTANCE_FAR (CACHE_LINE_SIZE*16)
+
+ENTRY(memmove)
+ cmp r2, #0
+ cmpne r0, r1
+ bxeq lr
+ subs r3, r0, r1
+ bls .L_jump_to_memcpy
+ cmp r2, r3
+ bhi .L_reversed_memcpy
+
+.L_jump_to_memcpy:
+ b memcpy
+
+.L_reversed_memcpy:
+ push {r0, lr}
+ .cfi_def_cfa_offset 8
+ .cfi_rel_offset r0, 0
+ .cfi_rel_offset lr, 4
+
+ add r0, r0, r2
+ add r1, r1, r2
+
+ /* preload next cache line */
+ pld [r1, #-CACHE_LINE_SIZE]
+ pld [r1, #-CACHE_LINE_SIZE*2]
+
+.L_reversed_memcpy_align_dest:
+ /* Deal with very small blocks (< 32bytes) asap */
+ cmp r2, #32
+ blo .L_reversed_memcpy_lt_32bytes
+ /* no need to align if len < 128 bytes */
+ cmp r2, #128
+ blo .L_reversed_memcpy_lt_128bytes
+ /* align destination to 64 bytes (1 cache line) */
+ ands r3, r0, #0x3f
+ beq .L_reversed_memcpy_dispatch
+ sub r2, r2, r3
+0: /* copy 1 byte */
+ movs ip, r3, lsl #31
+ ldrbmi ip, [r1, #-1]!
+ strbmi ip, [r0, #-1]!
+1: /* copy 2 bytes */
+ ldrbcs ip, [r1, #-1]!
+ strbcs ip, [r0, #-1]!
+ ldrbcs ip, [r1, #-1]!
+ strbcs ip, [r0, #-1]!
+2: /* copy 4 bytes */
+ movs ip, r3, lsl #29
+ bpl 3f
+ sub r1, r1, #4
+ sub r0, r0, #4
+ vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]
+ vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]
+3: /* copy 8 bytes */
+ bcc 4f
+ sub r1, r1, #8
+ sub r0, r0, #8
+ vld1.8 {d0}, [r1]
+ vst1.8 {d0}, [r0, :64]
+4: /* copy 16 bytes */
+ movs ip, r3, lsl #27
+ bpl 5f
+ sub r1, r1, #16
+ sub r0, r0, #16
+ vld1.8 {q0}, [r1]
+ vst1.8 {q0}, [r0, :128]
+5: /* copy 32 bytes */
+ bcc .L_reversed_memcpy_dispatch
+ sub r1, r1, #32
+ sub r0, r0, #32
+ vld1.8 {q0, q1}, [r1]
+ vst1.8 {q0, q1}, [r0, :256]
+
+.L_reversed_memcpy_dispatch:
+ /* preload more cache lines */
+ pld [r1, #-CACHE_LINE_SIZE*3]
+ pld [r1, #-CACHE_LINE_SIZE*4]
+
+ cmp r2, #MEMCPY_BLOCK_SIZE_SMALL
+ blo .L_reversed_memcpy_neon_pld_near
+ cmp r2, #MEMCPY_BLOCK_SIZE_MID
+ blo .L_reversed_memcpy_neon_pld_mid
+ b .L_reversed_memcpy_neon_pld_far
+
+.L_reversed_memcpy_neon_pld_near:
+ /* less than 128 bytes? */
+ subs r2, r2, #128
+ blo 1f
+ sub r1, r1, #32
+ sub r0, r0, #32
+ mov r3, #-32
+ .align 4
+0:
+ /* copy 128 bytes in each loop */
+ subs r2, r2, #128
+
+ /* preload to cache */
+ pld [r1, #-(PREFETCH_DISTANCE_NEAR+CACHE_LINE_SIZE*2)+32]
+ /* copy a cache line */
+ vld1.8 {q0, q1}, [r1], r3
+ vst1.8 {q0, q1}, [r0, :256], r3
+ vld1.8 {q0, q1}, [r1], r3
+ vst1.8 {q0, q1}, [r0, :256], r3
+
+ /* preload to cache */
+ pld [r1, #-(PREFETCH_DISTANCE_NEAR+CACHE_LINE_SIZE*2)+32]
+ /* copy a cache line */
+ vld1.8 {q0, q1}, [r1], r3
+ vst1.8 {q0, q1}, [r0, :256], r3
+ vld1.8 {q0, q1}, [r1], r3
+ vst1.8 {q0, q1}, [r0, :256], r3
+
+ bhs 0b
+ add r1, r1, #32
+ add r0, r0, #32
+1:
+ adds r2, r2, #128
+ bne .L_reversed_memcpy_lt_128bytes
+ pop {r0, pc}
+
+.L_reversed_memcpy_neon_pld_mid:
+ subs r2, r2, #128
+ sub r1, r1, #32
+ sub r0, r0, #32
+ mov r3, #-32
+ .align 4
+0:
+ /* copy 128 bytes in each loop */
+ subs r2, r2, #128
+
+ /* preload to cache */
+ pld [r1, #-(PREFETCH_DISTANCE_MID+CACHE_LINE_SIZE)+32]
+ /* copy a cache line */
+ vld1.8 {q0, q1}, [r1], r3
+ vst1.8 {q0, q1}, [r0, :256], r3
+ vld1.8 {q0, q1}, [r1], r3
+ vst1.8 {q0, q1}, [r0, :256], r3
+
+ /* preload to cache */
+ pld [r1, #-(PREFETCH_DISTANCE_MID+CACHE_LINE_SIZE)+32]
+ /* copy a cache line */
+ vld1.8 {q0, q1}, [r1], r3
+ vst1.8 {q0, q1}, [r0, :256], r3
+ vld1.8 {q0, q1}, [r1], r3
+ vst1.8 {q0, q1}, [r0, :256], r3
+
+ bhs 0b
+ add r1, r1, #32
+ add r0, r0, #32
+1:
+ adds r2, r2, #128
+ bne .L_reversed_memcpy_lt_128bytes
+ pop {r0, pc}
+
+.L_reversed_memcpy_neon_pld_far:
+ sub r2, r2, #128
+ sub r0, r0, #128
+ sub r1, r1, #128
+ .align 4
+0:
+ /* copy 128 bytes in each loop */
+ subs r2, r2, #128
+
+ /* preload to cache */
+ pld [r1, #-(PREFETCH_DISTANCE_FAR+CACHE_LINE_SIZE*2)+128]
+ pld [r1, #-(PREFETCH_DISTANCE_FAR+CACHE_LINE_SIZE)+128]
+ /* read */
+ vld1.8 {q0, q1}, [r1]!
+ vld1.8 {q2, q3}, [r1]!
+ vld1.8 {q8, q9}, [r1]!
+ vld1.8 {q10, q11}, [r1]!
+ /* write */
+ vst1.8 {q0, q1}, [r0, :256]!
+ vst1.8 {q2, q3}, [r0, :256]!
+ vst1.8 {q8, q9}, [r0, :256]!
+ vst1.8 {q10, q11}, [r0, :256]!
+
+ sub r0, r0, #256
+ sub r1, r1, #256
+ bhs 0b
+ add r0, r0, #128
+ add r1, r1, #128
+1:
+ adds r2, r2, #128
+ bne .L_reversed_memcpy_lt_128bytes
+ pop {r0, pc}
+
+.L_reversed_memcpy_lt_128bytes:
+6: /* copy 64 bytes */
+ movs ip, r2, lsl #26
+ bcc 5f
+ sub r1, r1, #32
+ sub r0, r0, #32
+ vld1.8 {q0, q1}, [r1]
+ vst1.8 {q0, q1}, [r0]
+ sub r1, r1, #32
+ sub r0, r0, #32
+ vld1.8 {q0, q1}, [r1]
+ vst1.8 {q0, q1}, [r0]
+5: /* copy 32 bytes */
+ bpl 4f
+ sub r1, r1, #32
+ sub r0, r0, #32
+ vld1.8 {q0, q1}, [r1]
+ vst1.8 {q0, q1}, [r0]
+.L_reversed_memcpy_lt_32bytes:
+4: /* copy 16 bytes */
+ movs ip, r2, lsl #28
+ bcc 3f
+ sub r1, r1, #16
+ sub r0, r0, #16
+ vld1.8 {q0}, [r1]
+ vst1.8 {q0}, [r0]
+3: /* copy 8 bytes */
+ bpl 2f
+ sub r1, r1, #8
+ sub r0, r0, #8
+ vld1.8 {d0}, [r1]
+ vst1.8 {d0}, [r0]
+2: /* copy 4 bytes */
+ ands ip, r2, #0x4
+ beq 1f
+ sub r1, r1, #4
+ sub r0, r0, #4
+ vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]
+ vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+1: /* copy 2 bytes */
+ movs ip, r2, lsl #31
+ ldrbcs ip, [r1, #-1]!
+ strbcs ip, [r0, #-1]!
+ ldrbcs ip, [r1, #-1]!
+ strbcs ip, [r0, #-1]!
+0: /* copy 1 byte */
+ ldrbmi ip, [r1, #-1]!
+ strbmi ip, [r0, #-1]!
+
+ pop {r0, pc}
+
+END(memmove)
diff --git a/libc/arch-arm/denver/denver.mk b/libc/arch-arm/denver/denver.mk
index 3fcc457..6989187 100644
--- a/libc/arch-arm/denver/denver.mk
+++ b/libc/arch-arm/denver/denver.mk
@@ -1,12 +1,13 @@
libc_bionic_src_files_arm += \
arch-arm/denver/bionic/memcpy.S \
+ arch-arm/denver/bionic/memmove.S \
arch-arm/denver/bionic/memset.S \
arch-arm/denver/bionic/__strcat_chk.S \
- arch-arm/denver/bionic/__strcpy_chk.S
+ arch-arm/denver/bionic/__strcpy_chk.S \
# Use cortex-a15 versions of strcat/strcpy/strlen.
libc_bionic_src_files_arm += \
arch-arm/cortex-a15/bionic/strcat.S \
arch-arm/cortex-a15/bionic/strcpy.S \
arch-arm/cortex-a15/bionic/strlen.S \
- arch-arm/cortex-a15/bionic/strcmp.S
+ arch-arm/cortex-a15/bionic/strcmp.S \
diff --git a/libc/arch-arm/generic/generic.mk b/libc/arch-arm/generic/generic.mk
index 2bc84e0..2456e6e 100644
--- a/libc/arch-arm/generic/generic.mk
+++ b/libc/arch-arm/generic/generic.mk
@@ -4,6 +4,7 @@ libc_bionic_src_files_arm += \
arch-arm/generic/bionic/strcmp.S \
arch-arm/generic/bionic/strcpy.S \
arch-arm/generic/bionic/strlen.c \
+ bionic/memmove.c \
bionic/__strcat_chk.cpp \
bionic/__strcpy_chk.cpp \
upstream-openbsd/lib/libc/string/strcat.c \
diff --git a/libc/arch-arm/krait/krait.mk b/libc/arch-arm/krait/krait.mk
index 08342d6..631ab68 100644
--- a/libc/arch-arm/krait/krait.mk
+++ b/libc/arch-arm/krait/krait.mk
@@ -5,8 +5,9 @@ libc_bionic_src_files_arm += \
arch-arm/krait/bionic/__strcat_chk.S \
arch-arm/krait/bionic/__strcpy_chk.S \
-# Use cortex-a15 versions of strcat/strcpy/strlen.
+# Use cortex-a15 versions of strcat/strcpy/strlen and standard memmove
libc_bionic_src_files_arm += \
arch-arm/cortex-a15/bionic/strcat.S \
arch-arm/cortex-a15/bionic/strcpy.S \
arch-arm/cortex-a15/bionic/strlen.S \
+ bionic/memmove.c \
diff --git a/tests/string_test.cpp b/tests/string_test.cpp
index 5ccc63d..f17e575 100644
--- a/tests/string_test.cpp
+++ b/tests/string_test.cpp
@@ -909,6 +909,56 @@ TEST(string, memmove) {
}
}
+static void verify_memmove(char* src_copy, char* dst, char* src, size_t size) {
+ memset(dst, 0, size);
+ memcpy(src, src_copy, size);
+ ASSERT_EQ(dst, memmove(dst, src, size));
+ ASSERT_EQ(0, memcmp(dst, src_copy, size));
+}
+
+#define MEMMOVE_DATA_SIZE (1024*1024*3)
+
+TEST(string, memmove_check) {
+ char* buffer = reinterpret_cast<char*>(malloc(MEMMOVE_DATA_SIZE));
+ ASSERT_TRUE(buffer != NULL);
+
+ char* src_data = reinterpret_cast<char*>(malloc(MEMMOVE_DATA_SIZE));
+ ASSERT_TRUE(src_data != NULL);
+ // Initialize to a known pattern to copy into src for each test and
+ // to compare dst against.
+ for (size_t i = 0; i < MEMMOVE_DATA_SIZE; i++) {
+ src_data[i] = (i + 1) % 255;
+ }
+
+ // Check all different dst offsets between 0 and 127 inclusive.
+ char* src = buffer;
+ for (size_t i = 0; i < 127; i++) {
+ char* dst = buffer + 256 + i;
+ // Small copy.
+ verify_memmove(src_data, dst, src, 1024);
+
+ // Medium copy.
+ verify_memmove(src_data, dst, src, 64 * 1024);
+
+ // Medium copy.
+ verify_memmove(src_data, dst, src, 1024 * 1024 + 128 * 1024);
+ }
+
+ // Check all leftover size offsets between 1 and 127 inclusive.
+ char* dst = buffer + 256;
+ src = buffer;
+ for (size_t size = 1; size < 127; size++) {
+ // Small copy.
+ verify_memmove(src_data, dst, src, 1024);
+
+ // Medium copy.
+ verify_memmove(src_data, dst, src, 64 * 1024);
+
+ // Large copy.
+ verify_memmove(src_data, dst, src, 1024 * 1024 + 128 * 1024);
+ }
+}
+
TEST(string, bcopy) {
StringTestState<char> state(LARGE);
for (size_t i = 0; i < state.n; i++) {
@@ -964,6 +1014,22 @@ TEST(string, memcpy_overread) {
RunSrcDstBufferOverreadTest(DoMemcpyTest);
}
+static void DoMemmoveTest(uint8_t* src, uint8_t* dst, size_t len) {
+ memset(src, (len % 255) + 1, len);
+ memset(dst, 0, len);
+
+ ASSERT_EQ(dst, memmove(dst, src, len));
+ ASSERT_TRUE(memcmp(src, dst, len) == 0);
+}
+
+TEST(string, memmove_align) {
+ RunSrcDstBufferAlignTest(LARGE, DoMemmoveTest);
+}
+
+TEST(string, memmove_overread) {
+ RunSrcDstBufferOverreadTest(DoMemmoveTest);
+}
+
static void DoMemsetTest(uint8_t* buf, size_t len) {
for (size_t i = 0; i < len; i++) {
buf[i] = 0;