diff options
author | Alexander Ivchenko <alexander.ivchenko@intel.com> | 2013-06-27 12:55:46 +0400 |
---|---|---|
committer | Elliott Hughes <enh@google.com> | 2013-08-02 17:57:23 -0700 |
commit | baa91f4f8974b6e9a81fa3aa49f051b3bf823653 (patch) | |
tree | 5107a2ba56f4f360dd171abaa3f84e360ab0ab5b /libc/arch-x86 | |
parent | aa2733d17b87c607fccbd6e6a0f44d2d411ffd77 (diff) | |
download | bionic-baa91f4f8974b6e9a81fa3aa49f051b3bf823653.zip bionic-baa91f4f8974b6e9a81fa3aa49f051b3bf823653.tar.gz bionic-baa91f4f8974b6e9a81fa3aa49f051b3bf823653.tar.bz2 |
Add ssse3 implementation of __memcmp16.
__memcmp16 was missing in x86. Also added C-version for backward
compatibility. Added bionic test for __memcmp16 and for wmemcmp.
Change-Id: I33718441e7ee343cdb021d91dbeaf9ce2d4d7eb4
Signed-off-by: Alexander Ivchenko <alexander.ivchenko@intel.com>
Diffstat (limited to 'libc/arch-x86')
-rw-r--r-- | libc/arch-x86/string/ssse3-memcmp-atom.S | 344 | ||||
-rw-r--r-- | libc/arch-x86/string/ssse3-memcmp16-atom.S | 37 | ||||
-rw-r--r-- | libc/arch-x86/string/ssse3-wmemcmp-atom.S | 4 | ||||
-rw-r--r-- | libc/arch-x86/x86.mk | 2 |
4 files changed, 357 insertions, 30 deletions
diff --git a/libc/arch-x86/string/ssse3-memcmp-atom.S b/libc/arch-x86/string/ssse3-memcmp-atom.S index 30e3173..0387084 100644 --- a/libc/arch-x86/string/ssse3-memcmp-atom.S +++ b/libc/arch-x86/string/ssse3-memcmp-atom.S @@ -1,5 +1,5 @@ /* -Copyright (c) 2010, 2011 Intel Corporation +Copyright (c) 2010, 2011, 2012, 2013 Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -106,9 +106,12 @@ name: \ ENTRY (MEMCMP) movl LEN(%esp), %ecx -#ifdef USE_AS_WMEMCMP +#ifdef USE_WCHAR shl $2, %ecx jz L(zero) +#elif defined USE_UTF16 + shl $1, %ecx + jz L(zero) #endif movl BLK1(%esp), %eax @@ -116,7 +119,7 @@ ENTRY (MEMCMP) movl BLK2(%esp), %edx jae L(48bytesormore) -#ifndef USE_AS_WMEMCMP +#if !defined(USE_WCHAR) && !defined(USE_UTF16) cmp $1, %ecx jbe L(less1bytes) #endif @@ -128,7 +131,7 @@ ENTRY (MEMCMP) CFI_POP (%ebx) -#ifndef USE_AS_WMEMCMP +#if !defined(USE_WCHAR) && !defined(USE_UTF16) .p2align 4 L(less1bytes): jb L(zero) @@ -174,7 +177,7 @@ L(48bytesormore): jz L(shr_0) xor %edx, %esi -#ifndef USE_AS_WMEMCMP +#if !defined(USE_WCHAR) && !defined(USE_UTF16) cmp $8, %edx jae L(next_unaligned_table) cmp $0, %edx @@ -210,7 +213,7 @@ L(next_unaligned_table): cmp $14, %edx je L(shr_14) jmp L(shr_15) -#else +#elif defined(USE_WCHAR) cmp $0, %edx je L(shr_0) cmp $4, %edx @@ -218,6 +221,22 @@ L(next_unaligned_table): cmp $8, %edx je L(shr_8) jmp L(shr_12) +#elif defined(USE_UTF16) + cmp $0, %edx + je L(shr_0) + cmp $2, %edx + je L(shr_2) + cmp $4, %edx + je L(shr_4) + cmp $6, %edx + je L(shr_6) + cmp $8, %edx + je L(shr_8) + cmp $10, %edx + je L(shr_10) + cmp $12, %edx + je L(shr_12) + jmp L(shr_14) #endif .p2align 4 @@ -289,7 +308,7 @@ L(shr_0_gobble_loop_next): POP (%esi) jmp L(less48bytes) -#ifndef USE_AS_WMEMCMP +#if !defined(USE_WCHAR) && !defined(USE_UTF16) cfi_restore_state cfi_remember_state .p2align 4 @@ -372,8 +391,10 @@ L(shr_1_gobble_next): POP (%edi) POP (%esi) jmp L(less48bytes) +#endif +#if !defined(USE_WCHAR) cfi_restore_state cfi_remember_state .p2align 4 @@ -456,7 +477,9 @@ L(shr_2_gobble_next): POP (%edi) POP (%esi) jmp L(less48bytes) +#endif +#if !defined(USE_WCHAR) && !defined(USE_UTF16) cfi_restore_state cfi_remember_state .p2align 4 @@ -624,7 +647,7 @@ L(shr_4_gobble_next): POP (%esi) jmp L(less48bytes) -#ifndef USE_AS_WMEMCMP +#if !defined(USE_WCHAR) && !defined(USE_UTF16) cfi_restore_state cfi_remember_state .p2align 4 @@ -707,7 +730,9 @@ L(shr_5_gobble_next): POP (%edi) POP (%esi) jmp L(less48bytes) +#endif +#if !defined(USE_WCHAR) cfi_restore_state cfi_remember_state .p2align 4 @@ -790,7 +815,9 @@ L(shr_6_gobble_next): POP (%edi) POP (%esi) jmp L(less48bytes) +#endif +#if !defined(USE_WCHAR) && !defined(USE_UTF16) cfi_restore_state cfi_remember_state .p2align 4 @@ -958,7 +985,7 @@ L(shr_8_gobble_next): POP (%esi) jmp L(less48bytes) -#ifndef USE_AS_WMEMCMP +#if !defined(USE_WCHAR) && !defined(USE_UTF16) cfi_restore_state cfi_remember_state .p2align 4 @@ -1041,7 +1068,9 @@ L(shr_9_gobble_next): POP (%edi) POP (%esi) jmp L(less48bytes) +#endif +#if !defined(USE_WCHAR) cfi_restore_state cfi_remember_state .p2align 4 @@ -1124,7 +1153,9 @@ L(shr_10_gobble_next): POP (%edi) POP (%esi) jmp L(less48bytes) +#endif +#if !defined(USE_WCHAR) && !defined(USE_UTF16) cfi_restore_state cfi_remember_state .p2align 4 @@ -1292,7 +1323,7 @@ L(shr_12_gobble_next): POP (%esi) jmp L(less48bytes) -#ifndef USE_AS_WMEMCMP +#if !defined(USE_WCHAR) && !defined(USE_UTF16) cfi_restore_state cfi_remember_state .p2align 4 @@ -1375,7 +1406,9 @@ L(shr_13_gobble_next): POP (%edi) POP (%esi) jmp L(less48bytes) +#endif +#if !defined(USE_WCHAR) cfi_restore_state cfi_remember_state .p2align 4 @@ -1458,7 +1491,9 @@ L(shr_14_gobble_next): POP (%edi) POP (%esi) jmp L(less48bytes) +#endif +#if !defined(USE_WCHAR) && !defined(USE_UTF16) cfi_restore_state cfi_remember_state .p2align 4 @@ -1558,7 +1593,7 @@ L(first16bytes): add %eax, %esi L(less16bytes): -#ifndef USE_AS_WMEMCMP +#if !defined(USE_WCHAR) && !defined(USE_UTF16) test %dl, %dl jz L(next_24_bytes) @@ -1668,7 +1703,7 @@ L(Byte31): movzbl -9(%esi), %edx sub %edx, %eax RETURN_END -#else +#elif defined(USE_AS_WMEMCMP) /* special for wmemcmp */ test %dl, %dl @@ -1682,7 +1717,6 @@ L(Byte31): neg %eax RETURN - .p2align 4 L(second_double_word): mov -12(%edi), %ecx @@ -1691,7 +1725,7 @@ L(second_double_word): jg L(nequal_bigger) neg %eax RETURN - + .p2align 4 L(next_two_double_words): and $15, %dh @@ -1715,6 +1749,79 @@ L(fourth_double_word): .p2align 4 L(nequal_bigger): RETURN_END + +#elif defined(USE_AS_MEMCMP16) + +/* special for __memcmp16 */ + test %dl, %dl + jz L(next_four_words) + test $15, %dl + jz L(second_two_words) + test $3, %dl + jz L(second_word) + movzwl -16(%edi), %eax + movzwl -16(%esi), %ebx + subl %ebx, %eax + RETURN + + .p2align 4 +L(second_word): + movzwl -14(%edi), %eax + movzwl -14(%esi), %ebx + subl %ebx, %eax + RETURN + + .p2align 4 +L(second_two_words): + test $63, %dl + jz L(fourth_word) + movzwl -12(%edi), %eax + movzwl -12(%esi), %ebx + subl %ebx, %eax + RETURN + + .p2align 4 +L(fourth_word): + movzwl -10(%edi), %eax + movzwl -10(%esi), %ebx + subl %ebx, %eax + RETURN + + .p2align 4 +L(next_four_words): + test $15, %dh + jz L(fourth_two_words) + test $3, %dh + jz L(sixth_word) + movzwl -8(%edi), %eax + movzwl -8(%esi), %ebx + subl %ebx, %eax + RETURN + + .p2align 4 +L(sixth_word): + movzwl -6(%edi), %eax + movzwl -6(%esi), %ebx + subl %ebx, %eax + RETURN + + .p2align 4 +L(fourth_two_words): + test $63, %dh + jz L(eighth_word) + movzwl -4(%edi), %eax + movzwl -4(%esi), %ebx + subl %ebx, %eax + RETURN + + .p2align 4 +L(eighth_word): + movzwl -2(%edi), %eax + movzwl -2(%esi), %ebx + subl %ebx, %eax + RETURN +#else +# error Unreachable preprocessor case #endif CFI_PUSH (%ebx) @@ -1725,7 +1832,7 @@ L(more8bytes): jae L(more16bytes) cmp $8, %ecx je L(8bytes) -#ifndef USE_AS_WMEMCMP +#if !defined(USE_WCHAR) && !defined(USE_UTF16) cmp $9, %ecx je L(9bytes) cmp $10, %ecx @@ -1739,8 +1846,16 @@ L(more8bytes): cmp $14, %ecx je L(14bytes) jmp L(15bytes) -#else +#elif defined(USE_WCHAR) && !defined(USE_UTF16) jmp L(12bytes) +#elif defined(USE_UTF16) && !defined(USE_WCHAR) + cmp $10, %ecx + je L(10bytes) + cmp $12, %ecx + je L(12bytes) + jmp L(14bytes) +#else +# error Unreachable preprocessor case #endif .p2align 4 @@ -1749,7 +1864,7 @@ L(more16bytes): jae L(more24bytes) cmp $16, %ecx je L(16bytes) -#ifndef USE_AS_WMEMCMP +#if !defined(USE_WCHAR) && !defined(USE_UTF16) cmp $17, %ecx je L(17bytes) cmp $18, %ecx @@ -1763,8 +1878,16 @@ L(more16bytes): cmp $22, %ecx je L(22bytes) jmp L(23bytes) -#else +#elif defined(USE_WCHAR) && !defined(USE_UTF16) jmp L(20bytes) +#elif defined(USE_UTF16) && !defined(USE_WCHAR) + cmp $18, %ecx + je L(18bytes) + cmp $20, %ecx + je L(20bytes) + jmp L(22bytes) +#else +# error Unreachable preprocessor case #endif .p2align 4 @@ -1773,7 +1896,7 @@ L(more24bytes): jae L(more32bytes) cmp $24, %ecx je L(24bytes) -#ifndef USE_AS_WMEMCMP +#if !defined(USE_WCHAR) && !defined(USE_UTF16) cmp $25, %ecx je L(25bytes) cmp $26, %ecx @@ -1787,8 +1910,16 @@ L(more24bytes): cmp $30, %ecx je L(30bytes) jmp L(31bytes) -#else +#elif defined(USE_WCHAR) && !defined(USE_UTF16) jmp L(28bytes) +#elif defined(USE_UTF16) && !defined(USE_WCHAR) + cmp $26, %ecx + je L(26bytes) + cmp $28, %ecx + je L(28bytes) + jmp L(30bytes) +#else +# error Unreachable preprocessor case #endif .p2align 4 @@ -1797,7 +1928,7 @@ L(more32bytes): jae L(more40bytes) cmp $32, %ecx je L(32bytes) -#ifndef USE_AS_WMEMCMP +#if !defined(USE_WCHAR) && !defined(USE_UTF16) cmp $33, %ecx je L(33bytes) cmp $34, %ecx @@ -1811,15 +1942,23 @@ L(more32bytes): cmp $38, %ecx je L(38bytes) jmp L(39bytes) -#else +#elif defined(USE_WCHAR) && !defined(USE_UTF16) jmp L(36bytes) +#elif defined(USE_UTF16) && !defined(USE_WCHAR) + cmp $34, %ecx + je L(34bytes) + cmp $36, %ecx + je L(36bytes) + jmp L(38bytes) +#else +# error Unreachable preprocessor case #endif .p2align 4 L(less48bytes): cmp $8, %ecx jae L(more8bytes) -#ifndef USE_AS_WMEMCMP +#if !defined(USE_WCHAR) && !defined(USE_UTF16) cmp $2, %ecx je L(2bytes) cmp $3, %ecx @@ -1831,15 +1970,23 @@ L(less48bytes): cmp $6, %ecx je L(6bytes) jmp L(7bytes) -#else +#elif defined(USE_WCHAR) && !defined(USE_UTF16) jmp L(4bytes) +#elif defined(USE_UTF16) && !defined(USE_WCHAR) + cmp $2, %ecx + je L(2bytes) + cmp $4, %ecx + je L(4bytes) + jmp L(6bytes) +#else +# error Unreachable preprocessor case #endif .p2align 4 L(more40bytes): cmp $40, %ecx je L(40bytes) -#ifndef USE_AS_WMEMCMP +#if !defined(USE_WCHAR) && !defined(USE_UTF16) cmp $41, %ecx je L(41bytes) cmp $42, %ecx @@ -1853,7 +2000,15 @@ L(more40bytes): cmp $46, %ecx je L(46bytes) jmp L(47bytes) +#elif defined(USE_UTF16) && !defined(USE_WCHAR) + cmp $42, %ecx + je L(42bytes) + cmp $44, %ecx + je L(44bytes) + jmp L(46bytes) +#endif +#if !defined(USE_AS_WMEMCMP) && !defined(USE_AS_MEMCMP16) .p2align 4 L(44bytes): mov -44(%eax), %ecx @@ -1914,7 +2069,8 @@ L(4bytes): POP (%ebx) ret CFI_PUSH (%ebx) -#else +#elif defined(USE_AS_WMEMCMP) + .p2align 4 L(44bytes): mov -44(%eax), %ecx @@ -1964,9 +2120,131 @@ L(4bytes): POP (%ebx) ret CFI_PUSH (%ebx) +#elif defined USE_AS_MEMCMP16 + + .p2align 4 +L(46bytes): + movzwl -46(%eax), %ecx + movzwl -46(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) +L(44bytes): + movzwl -44(%eax), %ecx + movzwl -44(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) +L(42bytes): + movzwl -42(%eax), %ecx + movzwl -42(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) +L(40bytes): + movzwl -40(%eax), %ecx + movzwl -40(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) +L(38bytes): + movzwl -38(%eax), %ecx + movzwl -38(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) +L(36bytes): + movzwl -36(%eax), %ecx + movzwl -36(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) +L(34bytes): + movzwl -34(%eax), %ecx + movzwl -34(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) +L(32bytes): + movzwl -32(%eax), %ecx + movzwl -32(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) +L(30bytes): + movzwl -30(%eax), %ecx + movzwl -30(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) +L(28bytes): + movzwl -28(%eax), %ecx + movzwl -28(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) +L(26bytes): + movzwl -26(%eax), %ecx + movzwl -26(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) +L(24bytes): + movzwl -24(%eax), %ecx + movzwl -24(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) +L(22bytes): + movzwl -22(%eax), %ecx + movzwl -22(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) +L(20bytes): + movzwl -20(%eax), %ecx + movzwl -20(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) +L(18bytes): + movzwl -18(%eax), %ecx + movzwl -18(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) +L(16bytes): + movzwl -16(%eax), %ecx + movzwl -16(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) +L(14bytes): + movzwl -14(%eax), %ecx + movzwl -14(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) +L(12bytes): + movzwl -12(%eax), %ecx + movzwl -12(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) +L(10bytes): + movzwl -10(%eax), %ecx + movzwl -10(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) +L(8bytes): + movzwl -8(%eax), %ecx + movzwl -8(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) +L(6bytes): + movzwl -6(%eax), %ecx + movzwl -6(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) +L(4bytes): + movzwl -4(%eax), %ecx + movzwl -4(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) +L(2bytes): + movzwl -2(%eax), %eax + movzwl -2(%edx), %ebx + subl %ebx, %eax + POP (%ebx) + ret + CFI_PUSH (%ebx) +#else +# error Unreachable preprocessor case #endif -#ifndef USE_AS_WMEMCMP +#if !defined(USE_AS_WMEMCMP) && !defined(USE_AS_MEMCMP16) .p2align 4 L(45bytes): @@ -2191,9 +2469,8 @@ L(end): neg %eax L(bigger): ret -#else +#elif defined(USE_AS_WMEMCMP) -/* for wmemcmp */ .p2align 4 L(find_diff): POP (%ebx) @@ -2206,5 +2483,14 @@ L(find_diff): L(find_diff_bigger): ret +#elif defined(USE_AS_MEMCMP16) + + .p2align 4 +L(memcmp16_exit): + POP (%ebx) + mov %ecx, %eax + ret +#else +# error Unreachable preprocessor case #endif END (MEMCMP) diff --git a/libc/arch-x86/string/ssse3-memcmp16-atom.S b/libc/arch-x86/string/ssse3-memcmp16-atom.S new file mode 100644 index 0000000..1be8f3d --- /dev/null +++ b/libc/arch-x86/string/ssse3-memcmp16-atom.S @@ -0,0 +1,37 @@ +/* +Copyright (c) 2013, Intel Corporation +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#define MEMCMP __memcmp16 + +/* int __memcmp16(const unsigned short *ptr1, const unsigned short *ptr2, size_t n); */ + +#define USE_UTF16 +#define USE_AS_MEMCMP16 1 +#include "ssse3-memcmp-atom.S" diff --git a/libc/arch-x86/string/ssse3-wmemcmp-atom.S b/libc/arch-x86/string/ssse3-wmemcmp-atom.S index c146b04..2c3fa02 100644 --- a/libc/arch-x86/string/ssse3-wmemcmp-atom.S +++ b/libc/arch-x86/string/ssse3-wmemcmp-atom.S @@ -1,5 +1,5 @@ /* -Copyright (c) 2011, Intel Corporation +Copyright (c) 2011, 2012, 2013 Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -29,5 +29,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define MEMCMP wmemcmp + +#define USE_WCHAR #define USE_AS_WMEMCMP 1 #include "ssse3-memcmp-atom.S" diff --git a/libc/arch-x86/x86.mk b/libc/arch-x86/x86.mk index 3924ce3..19c1402 100644 --- a/libc/arch-x86/x86.mk +++ b/libc/arch-x86/x86.mk @@ -27,6 +27,7 @@ _LIBC_ARCH_COMMON_SRC_FILES += \ arch-x86/string/ssse3-strcpy-atom.S \ arch-x86/string/ssse3-memcmp-atom.S \ arch-x86/string/ssse3-wmemcmp-atom.S \ + arch-x86/string/ssse3-memcmp16-atom.S \ arch-x86/string/ssse3-wcscat-atom.S \ arch-x86/string/ssse3-wcscpy-atom.S else @@ -38,6 +39,7 @@ _LIBC_ARCH_COMMON_SRC_FILES += \ arch-x86/string/strncmp.S \ arch-x86/string/strcat.S \ arch-x86/string/memcmp.S \ + string/memcmp16.c \ string/strcpy.c \ string/strncat.c \ string/strncpy.c \ |