summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristopher Ferris <cferris@google.com>2014-07-11 22:58:55 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2014-07-10 20:49:23 +0000
commit385bb1ededa734e88bafb8aaf817e9a984bf0d18 (patch)
treeac60c16b4dbbb393d3f9ddf2f862c4c740e48194
parent96502e273eea49c95899af57fa578c8055b9618c (diff)
parent8c20c13100d159ff505af9e6e19cab30f368a074 (diff)
downloadbionic-385bb1ededa734e88bafb8aaf817e9a984bf0d18.zip
bionic-385bb1ededa734e88bafb8aaf817e9a984bf0d18.tar.gz
bionic-385bb1ededa734e88bafb8aaf817e9a984bf0d18.tar.bz2
Merge "Add optimized memchr implementation from newlib"
-rw-r--r--libc/arch-arm64/arm64.mk1
-rw-r--r--libc/arch-arm64/denver64/denver64.mk1
-rw-r--r--libc/arch-arm64/generic-neon/generic-neon.mk1
-rw-r--r--libc/arch-arm64/generic/bionic/memchr.S160
-rw-r--r--libc/arch-arm64/generic/generic.mk1
5 files changed, 163 insertions, 1 deletions
diff --git a/libc/arch-arm64/arm64.mk b/libc/arch-arm64/arm64.mk
index 20d9cf1..314358e 100644
--- a/libc/arch-arm64/arm64.mk
+++ b/libc/arch-arm64/arm64.mk
@@ -1,7 +1,6 @@
# arm64 specific configs
libc_common_src_files_arm64 := \
- bionic/memchr.c \
bionic/memrchr.c \
bionic/strrchr.cpp \
upstream-freebsd/lib/libc/string/wcscat.c \
diff --git a/libc/arch-arm64/denver64/denver64.mk b/libc/arch-arm64/denver64/denver64.mk
index c6ddb3f..d619c11 100644
--- a/libc/arch-arm64/denver64/denver64.mk
+++ b/libc/arch-arm64/denver64/denver64.mk
@@ -1,4 +1,5 @@
libc_bionic_src_files_arm64 += \
+ arch-arm64/generic/bionic/memchr.S \
arch-arm64/generic/bionic/memcmp.S \
arch-arm64/denver64/bionic/memcpy.S \
arch-arm64/generic/bionic/memmove.S \
diff --git a/libc/arch-arm64/generic-neon/generic-neon.mk b/libc/arch-arm64/generic-neon/generic-neon.mk
index f3bde5c..77e3861 100644
--- a/libc/arch-arm64/generic-neon/generic-neon.mk
+++ b/libc/arch-arm64/generic-neon/generic-neon.mk
@@ -1,4 +1,5 @@
libc_bionic_src_files_arm64 += \
+ arch-arm64/generic/bionic/memchr.S \
arch-arm64/generic/bionic/memcmp.S \
arch-arm64/generic/bionic/memmove.S \
arch-arm64/generic/bionic/memset.S \
diff --git a/libc/arch-arm64/generic/bionic/memchr.S b/libc/arch-arm64/generic/bionic/memchr.S
new file mode 100644
index 0000000..fbb00ca
--- /dev/null
+++ b/libc/arch-arm64/generic/bionic/memchr.S
@@ -0,0 +1,160 @@
+/*
+ * memchr - find a character in a memory zone
+ *
+ * Copyright (c) 2014, ARM Limited
+ * All rights Reserved.
+ * Copyright (c) 2014, Linaro Ltd.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the company nor the names of its contributors
+ * may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * Neon Available.
+ */
+
+#include <private/bionic_asm.h>
+
+/* Arguments and results. */
+#define srcin x0
+#define chrin w1
+#define cntin x2
+
+#define result x0
+
+#define src x3
+#define tmp x4
+#define wtmp2 w5
+#define synd x6
+#define soff x9
+#define cntrem x10
+
+#define vrepchr v0
+#define vdata1 v1
+#define vdata2 v2
+#define vhas_chr1 v3
+#define vhas_chr2 v4
+#define vrepmask v5
+#define vend v6
+
+/*
+ * Core algorithm:
+ *
+ * For each 32-byte chunk we calculate a 64-bit syndrome value, with two bits
+ * per byte. For each tuple, bit 0 is set if the relevant byte matched the
+ * requested character and bit 1 is not used (faster than using a 32bit
+ * syndrome). Since the bits in the syndrome reflect exactly the order in which
+ * things occur in the original string, counting trailing zeros allows to
+ * identify exactly which byte has matched.
+ */
+
+ENTRY(memchr)
+ /*
+ * Magic constant 0x40100401 allows us to identify which lane matches
+ * the requested byte.
+ */
+ mov wtmp2, #0x0401
+ movk wtmp2, #0x4010, lsl #16
+ dup vrepchr.16b, chrin
+ /* Work with aligned 32-byte chunks */
+ bic src, srcin, #31
+ dup vrepmask.4s, wtmp2
+ ands soff, srcin, #31
+ and cntrem, cntin, #31
+ b.eq .Lloop
+
+ /*
+ * Input string is not 32-byte aligned. We calculate the syndrome
+ * value for the aligned 32 bytes block containing the first bytes
+ * and mask the irrelevant part.
+ */
+
+ ld1 {vdata1.16b, vdata2.16b}, [src], #32
+ sub tmp, soff, #32
+ adds cntin, cntin, tmp
+ cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
+ cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
+ and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
+ and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
+ addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
+ addp vend.16b, vend.16b, vend.16b /* 128->64 */
+ mov synd, vend.2d[0]
+ /* Clear the soff*2 lower bits */
+ lsl tmp, soff, #1
+ lsr synd, synd, tmp
+ lsl synd, synd, tmp
+ /* The first block can also be the last */
+ b.ls .Lmasklast
+ /* Have we found something already? */
+ cbnz synd, .Ltail
+
+.Lloop:
+ ld1 {vdata1.16b, vdata2.16b}, [src], #32
+ subs cntin, cntin, #32
+ cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
+ cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
+ /* If we're out of data we finish regardless of the result */
+ b.ls .Lend
+ /* Use a fast check for the termination condition */
+ orr vend.16b, vhas_chr1.16b, vhas_chr2.16b
+ addp vend.2d, vend.2d, vend.2d
+ mov synd, vend.2d[0]
+ /* We're not out of data, loop if we haven't found the character */
+ cbz synd, .Lloop
+
+.Lend:
+ /* Termination condition found, let's calculate the syndrome value */
+ and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
+ and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
+ addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
+ addp vend.16b, vend.16b, vend.16b /* 128->64 */
+ mov synd, vend.2d[0]
+ /* Only do the clear for the last possible block */
+ b.hi .Ltail
+
+.Lmasklast:
+ /* Clear the (32 - ((cntrem + soff) % 32)) * 2 upper bits */
+ add tmp, cntrem, soff
+ and tmp, tmp, #31
+ sub tmp, tmp, #32
+ neg tmp, tmp, lsl #1
+ lsl synd, synd, tmp
+ lsr synd, synd, tmp
+
+.Ltail:
+ /* Count the trailing zeros using bit reversing */
+ rbit synd, synd
+ /* Compensate the last post-increment */
+ sub src, src, #32
+ /* Check that we have found a character */
+ cmp synd, #0
+ /* And count the leading zeros */
+ clz synd, synd
+ /* Compute the potential result */
+ add result, src, synd, lsr #1
+ /* Select result or NULL */
+ csel result, xzr, result, eq
+ ret
+END(memchr)
diff --git a/libc/arch-arm64/generic/generic.mk b/libc/arch-arm64/generic/generic.mk
index 878dcdf..1b595aa 100644
--- a/libc/arch-arm64/generic/generic.mk
+++ b/libc/arch-arm64/generic/generic.mk
@@ -1,4 +1,5 @@
libc_bionic_src_files_arm64 += \
+ arch-arm64/generic/bionic/memchr.S \
arch-arm64/generic/bionic/memcmp.S \
arch-arm64/generic/bionic/memcpy.S \
arch-arm64/generic/bionic/memmove.S \