diff options
author | Wolfgang Wiedmeyer <wolfgit@wiedmeyer.de> | 2016-12-12 14:59:47 +0100 |
---|---|---|
committer | Wolfgang Wiedmeyer <wolfgit@wiedmeyer.de> | 2016-12-12 14:59:47 +0100 |
commit | 73049dc117da1f669fa7cee4004e69b3493a58c9 (patch) | |
tree | 37dafd236bb27b1c23f59c7ad41655212f7c0902 /libc/arch-arm64 | |
parent | 385816ce486622d72a8e56443b53ff8404d4ae3b (diff) | |
parent | 92595081575c82ace07201a3ea32004eba968c0b (diff) | |
download | bionic-replicant-6.0.zip bionic-replicant-6.0.tar.gz bionic-replicant-6.0.tar.bz2 |
Merge branch 'cm-13.0' of https://github.com/CyanogenMod/android_bionic into replicant-6.0replicant-6.0-beta-0001replicant-6.0-alpha-0006replicant-6.0
Diffstat (limited to 'libc/arch-arm64')
-rw-r--r-- | libc/arch-arm64/generic/bionic/strrchr.S | 159 | ||||
-rw-r--r-- | libc/arch-arm64/kryo/bionic/memcpy_base.S | 4 |
2 files changed, 23 insertions, 140 deletions
diff --git a/libc/arch-arm64/generic/bionic/strrchr.S b/libc/arch-arm64/generic/bionic/strrchr.S index 46b5031..409bc71 100644 --- a/libc/arch-arm64/generic/bionic/strrchr.S +++ b/libc/arch-arm64/generic/bionic/strrchr.S @@ -25,147 +25,30 @@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -/* Assumptions: - * - * ARMv8-a, AArch64 - * Neon Available. - */ + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ #include <private/bionic_asm.h> -/* Arguments and results. */ -#define srcin x0 -#define chrin w1 - -#define result x0 - -#define src x2 -#define tmp1 x3 -#define wtmp2 w4 -#define tmp3 x5 -#define src_match x6 -#define src_offset x7 -#define const_m1 x8 -#define tmp4 x9 -#define nul_match x10 -#define chr_match x11 - -#define vrepchr v0 -#define vdata1 v1 -#define vdata2 v2 -#define vhas_nul1 v3 -#define vhas_nul2 v4 -#define vhas_chr1 v5 -#define vhas_chr2 v6 -#define vrepmask_0 v7 -#define vrepmask_c v16 -#define vend1 v17 -#define vend2 v18 - -/* Core algorithm. - - For each 32-byte hunk we calculate a 64-bit syndrome value, with - two bits per byte (LSB is always in bits 0 and 1, for both big - and little-endian systems). For each tuple, bit 0 is set iff - the relevant byte matched the requested character; bit 1 is set - iff the relevant byte matched the NUL end of string (we trigger - off bit0 for the special case of looking for NUL). Since the bits - in the syndrome reflect exactly the order in which things occur - in the original string a count_trailing_zeros() operation will - identify exactly which byte is causing the termination, and why. */ - -/* Locals and temporaries. */ - +/* + * Find the last occurrence of a character in a string. + * + * Parameters: + * x0 - str + * x1 - c + * Returns: + * x0 - address of last occurrence of 'c' or 0 + */ ENTRY(strrchr) - /* Magic constant 0x40100401 to allow us to identify which lane - matches the requested byte. Magic constant 0x80200802 used - similarly for NUL termination. */ - mov wtmp2, #0x0401 - movk wtmp2, #0x4010, lsl #16 - dup vrepchr.16b, chrin - bic src, srcin, #31 /* Work with aligned 32-byte hunks. */ - dup vrepmask_c.4s, wtmp2 - mov src_offset, #0 - ands tmp1, srcin, #31 - add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */ - b.eq .Laligned - - /* Input string is not 32-byte aligned. Rather than forcing - the padding bytes to a safe value, we calculate the syndrome - for all the bytes, but then mask off those bits of the - syndrome that are related to the padding. */ - ld1 {vdata1.16b, vdata2.16b}, [src], #32 - neg tmp1, tmp1 - cmeq vhas_nul1.16b, vdata1.16b, #0 - cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b - cmeq vhas_nul2.16b, vdata2.16b, #0 - cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b - and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b - and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b - and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b - and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b - addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b // 256->128 - addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 - addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b // 128->64 - addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b // 128->64 - mov nul_match, vhas_nul1.2d[0] - lsl tmp1, tmp1, #1 - mov const_m1, #~0 - mov chr_match, vhas_chr1.2d[0] - lsr tmp3, const_m1, tmp1 - - bic nul_match, nul_match, tmp3 // Mask padding bits. - bic chr_match, chr_match, tmp3 // Mask padding bits. - cbnz nul_match, .Ltail - -.Lloop: - cmp chr_match, #0 - csel src_match, src, src_match, ne - csel src_offset, chr_match, src_offset, ne -.Laligned: - ld1 {vdata1.16b, vdata2.16b}, [src], #32 - cmeq vhas_nul1.16b, vdata1.16b, #0 - cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b - cmeq vhas_nul2.16b, vdata2.16b, #0 - cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b - addp vend1.16b, vhas_nul1.16b, vhas_nul2.16b // 256->128 - and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b - and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b - addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 - addp vend1.16b, vend1.16b, vend1.16b // 128->64 - addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b // 128->64 - mov nul_match, vend1.2d[0] - mov chr_match, vhas_chr1.2d[0] - cbz nul_match, .Lloop - - and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b - and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b - addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b - addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b - mov nul_match, vhas_nul1.2d[0] - -.Ltail: - /* Work out exactly where the string ends. */ - sub tmp4, nul_match, #1 - eor tmp4, tmp4, nul_match - ands chr_match, chr_match, tmp4 - /* And pick the values corresponding to the last match. */ - csel src_match, src, src_match, ne - csel src_offset, chr_match, src_offset, ne - - /* Count down from the top of the syndrome to find the last match. */ - clz tmp3, src_offset - /* Src_match points beyond the word containing the match, so we can - simply subtract half the bit-offset into the syndrome. Because - we are counting down, we need to go back one more character. */ - add tmp3, tmp3, #2 - sub result, src_match, tmp3, lsr #1 - /* But if the syndrome shows no match was found, then return NULL. */ - cmp src_offset, #0 - csel result, result, xzr, ne - + mov x3, #0 + and w1, w1, #0xff +1: ldrb w2, [x0], #1 + cbz w2, 2f + cmp w2, w1 + b.ne 1b + sub x3, x0, #1 + b 1b +2: mov x0, x3 ret - END(strrchr) diff --git a/libc/arch-arm64/kryo/bionic/memcpy_base.S b/libc/arch-arm64/kryo/bionic/memcpy_base.S index 0096bb7..ee0757d 100644 --- a/libc/arch-arm64/kryo/bionic/memcpy_base.S +++ b/libc/arch-arm64/kryo/bionic/memcpy_base.S @@ -170,8 +170,8 @@ kryo_bb_prime_pump: add x10, x1, #(PLDOFFS*PLDSIZE) bic x10, x10, #0x7F sub x12, x12, #PLDOFFS - prfm PLDL1KEEP, [x10, #(-1*PLDSIZE)] - prfm PLDL1KEEP, [x10, #(-1*PLDSIZE+64)] + prfm PLDL1KEEP, [x10, #((PLDOFFS-1)*PLDSIZE)] + prfm PLDL1KEEP, [x10, #((PLDOFFS-1)*PLDSIZE)+64] cmp x12, #(448*1024/128) bhi kryo_bb_copy_128_loop_ddr |