/* Copyright (c) 2014, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * Neither the name of Intel Corporation nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef L # define L(label) .L##label #endif #ifndef cfi_startproc # define cfi_startproc .cfi_startproc #endif #ifndef cfi_endproc # define cfi_endproc .cfi_endproc #endif #ifndef ENTRY # define ENTRY(name) \ .type name, @function; \ .globl name; \ .p2align 4; \ name: \ cfi_startproc #endif #ifndef END # define END(name) \ cfi_endproc; \ .size name, .-name #endif #ifndef STRLCPY # define STRLCPY strlcpy #endif #define JMPTBL(I, B) I - B #define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ lea TABLE(%rip), %r11; \ movslq (%r11, INDEX, SCALE), %rcx; \ lea (%r11, %rcx), %rcx; \ jmp *%rcx #define RETURN \ add %r9, %rax; \ ret .text ENTRY (STRLCPY) xor %rax, %rax xor %r9, %r9 mov %rdx, %r8 cmp $0, %r8 jz L(CalculateSrcLen) #ifdef USE_AS_STRLCAT xor %rcx, %rcx pxor %xmm0, %xmm0 movdqu (%rdi), %xmm1 pcmpeqb %xmm1, %xmm0 pmovmskb %xmm0, %rdx cmp $17, %r8 jb L(SizeEndCase1) test %rdx, %rdx jnz L(StringEndCase1) add $16, %rax movdqu 16(%rdi), %xmm1 pcmpeqb %xmm1, %xmm0 pmovmskb %xmm0, %rdx cmp $33, %r8 jb L(SizeEndCase1) test %rdx, %rdx jnz L(StringEndCase1) mov %rdi, %rcx and $15, %rcx and $-16, %rdi add %rcx, %r8 sub $16, %r8 L(DstLenLoop): movdqa (%rdi, %rax), %xmm1 pcmpeqb %xmm1, %xmm0 pmovmskb %xmm0, %rdx sub $16, %r8 jbe L(SizeEndCase2) test %rdx, %rdx jnz L(StringEndCase2) add $16, %rax jmp L(DstLenLoop) L(StringEndCase2): add $16, %r8 bsf %rdx, %rdx sub %rdx, %r8 add %rdx, %rax sub %rcx, %r9 add %rax, %rdi jmp L(CopySrcString) L(SizeEndCase1): test %rdx, %rdx jz L(SizeEnd) bsf %rdx, %rdx add %rdx, %rax cmp %r8, %rax jb L(StringEnd) L(SizeEnd): mov %r8, %r9 jmp L(CalculateSrcLenCase1) L(SizeEndCase2): add $16, %r8 test %rdx, %rdx jz L(StringEndCase4) bsf %rdx, %rdx cmp %r8, %rdx jb L(StringEndCase3) L(StringEndCase4): add %r8, %rax sub %rcx, %rax mov %rax, %r9 jmp L(CalculateSrcLenCase1) L(StringEndCase3): add %rdx, %rax sub %rcx, %r9 add %rax, %rdi sub %rdx, %r8 jmp L(CopySrcString) L(StringEndCase1): bsf %rdx, %rdx add %rdx, %rax sub %rcx, %rax L(StringEnd): add %rax, %rdi sub %rax, %r8 #endif mov %rsi, %rcx and $63, %rcx cmp $32, %rcx jbe L(CopySrcString) and $-16, %rsi and $15, %rcx pxor %xmm0, %xmm0 pxor %xmm1, %xmm1 pcmpeqb (%rsi), %xmm1 pmovmskb %xmm1, %rdx shr %cl, %rdx mov $16, %r10 sub %rcx, %r10 cmp %r10, %r8 jbe L(CopyFrom1To16BytesTailCase2OrCase3) test %rdx, %rdx jnz L(CopyFrom1To16BytesTail) pcmpeqb 16(%rsi), %xmm0 pmovmskb %xmm0, %rdx add $16, %r10 cmp %r10, %r8 jbe L(CopyFrom1To32BytesCase2OrCase3) test %rdx, %rdx jnz L(CopyFrom1To32Bytes) movdqu (%rsi, %rcx), %xmm1 movdqu %xmm1, (%rdi) #ifdef USE_AS_STRLCAT add %rax, %r9 #endif jmp L(LoopStart) .p2align 4 L(CopySrcString): #ifdef USE_AS_STRLCAT add %rax, %r9 xor %rax, %rax #endif pxor %xmm0, %xmm0 movdqu (%rsi), %xmm1 pcmpeqb %xmm1, %xmm0 pmovmskb %xmm0, %rdx cmp $17, %r8 jb L(CopyFrom1To16BytesTail1Case2OrCase3) test %rdx, %rdx jnz L(CopyFrom1To16BytesTail1) movdqu 16(%rsi), %xmm2 pcmpeqb %xmm2, %xmm0 movdqu %xmm1, (%rdi) pmovmskb %xmm0, %rdx add $16, %rax cmp $33, %r8 jb L(CopyFrom1To32Bytes1Case2OrCase3) test %rdx, %rdx jnz L(CopyFrom1To32Bytes1) mov %rsi, %rcx and $15, %rcx and $-16, %rsi L(LoopStart): sub %rcx, %rdi add %rcx, %r8 sub $16, %r8 mov $16, %rax L(16Loop): movdqa (%rsi, %rax), %xmm1 pcmpeqb %xmm1, %xmm0 pmovmskb %xmm0, %rdx sub $16, %r8 jbe L(CopyFrom1To16BytesCase2OrCase3) test %rdx, %rdx jnz L(CopyFrom1To16BytesXmmExit) movdqu %xmm1, (%rdi, %rax) add $16, %rax jmp L(16Loop) /*------End of main part with loops---------------------*/ /* Case1 */ .p2align 4 L(CopyFrom1To16Bytes): add %rcx, %rdi add %rcx, %rsi bsf %rdx, %rdx add %rdx, %rax BRANCH_TO_JMPTBL_ENTRY (L(ExitStringTailTable), %rdx, 4) .p2align 4 L(CopyFrom1To16BytesTail): add %rcx, %rsi bsf %rdx, %rdx add %rdx, %rax BRANCH_TO_JMPTBL_ENTRY (L(ExitStringTailTable), %rdx, 4) .p2align 4 L(CopyFrom1To32Bytes1): add $16, %rsi add $16, %rdi sub $16, %r8 L(CopyFrom1To16BytesTail1): bsf %rdx, %rdx add %rdx, %rax BRANCH_TO_JMPTBL_ENTRY (L(ExitStringTailTable), %rdx, 4) .p2align 4 L(CopyFrom1To32Bytes): bsf %rdx, %rdx add %rcx, %rsi add $16, %rdx sub %rcx, %rdx add %rdx, %rax BRANCH_TO_JMPTBL_ENTRY (L(ExitStringTailTable), %rdx, 4) .p2align 4 L(CopyFrom1To16BytesExit): add %rdx, %rax BRANCH_TO_JMPTBL_ENTRY (L(ExitStringTailTable), %rdx, 4) /* Case2 */ .p2align 4 L(CopyFrom1To16BytesCase2): add $16, %r8 add %rax, %rdi add %rax, %rsi bsf %rdx, %rdx sub %rcx, %rax cmp %r8, %rdx jb L(CopyFrom1To16BytesExit) add %r8, %rax BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4) .p2align 4 L(CopyFrom1To32BytesCase2): add %rcx, %rsi bsf %rdx, %rdx add $16, %rdx sub %rcx, %rdx cmp %r8, %rdx jb L(CopyFrom1To16BytesExit) add %r8, %rax BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4) L(CopyFrom1To16BytesTailCase2): add %rcx, %rsi bsf %rdx, %rdx cmp %r8, %rdx jb L(CopyFrom1To16BytesExit) add %r8, %rax BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4) .p2align 4 L(CopyFrom1To16BytesTail1Case2): bsf %rdx, %rdx cmp %r8, %rdx jb L(CopyFrom1To16BytesExit) add %r8, %rax BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4) /* Case2 or Case3, Case3 */ .p2align 4 L(CopyFrom1To16BytesCase2OrCase3): test %rdx, %rdx jnz L(CopyFrom1To16BytesCase2) add $16, %r8 add %rax, %rdi add %rax, %rsi add %r8, %rax sub %rcx, %rax BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4) .p2align 4 L(CopyFrom1To32BytesCase2OrCase3): test %rdx, %rdx jnz L(CopyFrom1To32BytesCase2) add %rcx, %rsi add %r8, %rax BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4) .p2align 4 L(CopyFrom1To16BytesTailCase2OrCase3): test %rdx, %rdx jnz L(CopyFrom1To16BytesTailCase2) add %rcx, %rsi add %r8, %rax BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4) .p2align 4 L(CopyFrom1To32Bytes1Case2OrCase3): add $16, %rdi add $16, %rsi sub $16, %r8 L(CopyFrom1To16BytesTail1Case2OrCase3): test %rdx, %rdx jnz L(CopyFrom1To16BytesTail1Case2) add %r8, %rax BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4) .p2align 4 L(CopyFrom1To16BytesXmmExit): bsf %rdx, %rdx add %rax, %rdi add %rax, %rsi add %rdx, %rax sub %rcx, %rax BRANCH_TO_JMPTBL_ENTRY (L(ExitStringTailTable), %rdx, 4) /*------------End labels regarding with copying 1-16 bytes--and 1-32 bytes----*/ .p2align 4 L(Exit0): RETURN .p2align 4 L(Exit1): movb $0, (%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit2): movb (%rsi), %dh movb %dh, (%rdi) movb $0, 1(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit3): movw (%rsi), %dx movw %dx, (%rdi) movb $0, 2(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit4): movw (%rsi), %cx movb 2(%rsi), %dh movw %cx, (%rdi) movb %dh, 2(%rdi) movb $0, 3(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit5): movl (%rsi), %edx movl %edx, (%rdi) movb $0, 4(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit6): movl (%rsi), %ecx movb 4(%rsi), %dh movl %ecx, (%rdi) movb %dh, 4(%rdi) movb $0, 5(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit7): movl (%rsi), %ecx movw 4(%rsi), %dx movl %ecx, (%rdi) movw %dx, 4(%rdi) movb $0, 6(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit8): movl (%rsi), %ecx movl 3(%rsi), %edx movl %ecx, (%rdi) movl %edx, 3(%rdi) movb $0, 7(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit9): movq (%rsi), %rdx movq %rdx, (%rdi) movb $0, 8(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit10): movq (%rsi), %rcx movb 8(%rsi), %dh movq %rcx, (%rdi) movb %dh, 8(%rdi) movb $0, 9(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit11): movq (%rsi), %rcx movw 8(%rsi), %dx movq %rcx, (%rdi) movw %dx, 8(%rdi) movb $0, 10(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit12): movq (%rsi), %rcx movl 7(%rsi), %edx movq %rcx, (%rdi) movl %edx, 7(%rdi) movb $0, 11(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit13): movq (%rsi), %rcx movl 8(%rsi), %edx movq %rcx, (%rdi) movl %edx, 8(%rdi) movb $0, 12(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit14): movq (%rsi), %rcx movq 5(%rsi), %rdx movq %rcx, (%rdi) movq %rdx, 5(%rdi) movb $0, 13(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit15): movq (%rsi), %rcx movq 6(%rsi), %rdx movq %rcx, (%rdi) movq %rdx, 6(%rdi) movb $0, 14(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit16): movq (%rsi), %rcx movq 7(%rsi), %rdx movq %rcx, (%rdi) movq %rdx, 7(%rdi) movb $0, 15(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit17): movdqu (%rsi), %xmm0 movdqu %xmm0, (%rdi) movb $0, 16(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit18): movdqu (%rsi), %xmm0 movb 16(%rsi), %dh movdqu %xmm0, (%rdi) movb %dh, 16(%rdi) movb $0, 17(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit19): movdqu (%rsi), %xmm0 movw 16(%rsi), %cx movdqu %xmm0, (%rdi) movw %cx, 16(%rdi) movb $0, 18(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit20): movdqu (%rsi), %xmm0 movl 15(%rsi), %ecx movdqu %xmm0, (%rdi) movl %ecx, 15(%rdi) movb $0, 19(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit21): movdqu (%rsi), %xmm0 movl 16(%rsi), %ecx movdqu %xmm0, (%rdi) movl %ecx, 16(%rdi) movb $0, 20(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit22): movdqu (%rsi), %xmm0 movl 16(%rsi), %ecx movb 20(%rsi), %dh movdqu %xmm0, (%rdi) movl %ecx, 16(%rdi) movb %dh, 20(%rdi) movb $0, 21(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit23): movdqu (%rsi), %xmm0 movq 14(%rsi), %rcx movdqu %xmm0, (%rdi) movq %rcx, 14(%rdi) movb $0, 22(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit24): movdqu (%rsi), %xmm0 movq 15(%rsi), %rcx movdqu %xmm0, (%rdi) movq %rcx, 15(%rdi) movb $0, 23(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit25): movdqu (%rsi), %xmm0 movq 16(%rsi), %rcx movdqu %xmm0, (%rdi) movq %rcx, 16(%rdi) movb $0, 24(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit26): movdqu (%rsi), %xmm0 movq 16(%rsi), %rcx movb 24(%rsi), %dh movdqu %xmm0, (%rdi) movq %rcx, 16(%rdi) mov %dh, 24(%rdi) movb $0, 25(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit27): movdqu (%rsi), %xmm0 movq 16(%rsi), %rdx movw 24(%rsi), %cx movdqu %xmm0, (%rdi) movq %rdx, 16(%rdi) movw %cx, 24(%rdi) movb $0, 26(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit28): movdqu (%rsi), %xmm0 movq 16(%rsi), %rdx movl 23(%rsi), %ecx movdqu %xmm0, (%rdi) movq %rdx, 16(%rdi) movl %ecx, 23(%rdi) movb $0, 27(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit29): movdqu (%rsi), %xmm0 movq 16(%rsi), %rdx movl 24(%rsi), %ecx movdqu %xmm0, (%rdi) movq %rdx, 16(%rdi) movl %ecx, 24(%rdi) movb $0, 28(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit30): movdqu (%rsi), %xmm0 movdqu 13(%rsi), %xmm2 movdqu %xmm0, (%rdi) movdqu %xmm2, 13(%rdi) movb $0, 29(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit31): movdqu (%rsi), %xmm0 movdqu 14(%rsi), %xmm2 movdqu %xmm0, (%rdi) movdqu %xmm2, 14(%rdi) movb $0, 30(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(Exit32): movdqu (%rsi), %xmm0 movdqu 15(%rsi), %xmm2 movdqu %xmm0, (%rdi) movdqu %xmm2, 15(%rdi) movb $0, 31(%rdi) jmp L(CalculateSrcLen) .p2align 4 L(StringTail0): mov (%rsi), %dl mov %dl, (%rdi) RETURN .p2align 4 L(StringTail1): mov (%rsi), %dx mov %dx, (%rdi) RETURN .p2align 4 L(StringTail2): mov (%rsi), %cx mov 2(%rsi), %dl mov %cx, (%rdi) mov %dl, 2(%rdi) RETURN .p2align 4 L(StringTail3): mov (%rsi), %edx mov %edx, (%rdi) RETURN .p2align 4 L(StringTail4): mov (%rsi), %ecx mov 4(%rsi), %dl mov %ecx, (%rdi) mov %dl, 4(%rdi) RETURN .p2align 4 L(StringTail5): mov (%rsi), %ecx mov 4(%rsi), %dx mov %ecx, (%rdi) mov %dx, 4(%rdi) RETURN .p2align 4 L(StringTail6): mov (%rsi), %ecx mov 3(%rsi), %edx mov %ecx, (%rdi) mov %edx, 3(%rdi) RETURN .p2align 4 L(StringTail7): mov (%rsi), %rdx mov %rdx, (%rdi) RETURN .p2align 4 L(StringTail8): mov (%rsi), %rcx mov 8(%rsi), %dl mov %rcx, (%rdi) mov %dl, 8(%rdi) RETURN .p2align 4 L(StringTail9): mov (%rsi), %rcx mov 8(%rsi), %dx mov %rcx, (%rdi) mov %dx, 8(%rdi) RETURN .p2align 4 L(StringTail10): mov (%rsi), %rcx mov 7(%rsi), %edx mov %rcx, (%rdi) mov %edx, 7(%rdi) RETURN .p2align 4 L(StringTail11): mov (%rsi), %rcx mov 8(%rsi), %edx mov %rcx, (%rdi) mov %edx, 8(%rdi) RETURN .p2align 4 L(StringTail12): mov (%rsi), %rcx mov 5(%rsi), %rdx mov %rcx, (%rdi) mov %rdx, 5(%rdi) RETURN .p2align 4 L(StringTail13): mov (%rsi), %rcx mov 6(%rsi), %rdx mov %rcx, (%rdi) mov %rdx, 6(%rdi) RETURN .p2align 4 L(StringTail14): mov (%rsi), %rcx mov 7(%rsi), %rdx mov %rcx, (%rdi) mov %rdx, 7(%rdi) RETURN .p2align 4 L(StringTail15): movdqu (%rsi), %xmm0 movdqu %xmm0, (%rdi) RETURN .p2align 4 L(StringTail16): movdqu (%rsi), %xmm0 mov 16(%rsi), %cl movdqu %xmm0, (%rdi) mov %cl, 16(%rdi) RETURN .p2align 4 L(StringTail17): movdqu (%rsi), %xmm0 mov 16(%rsi), %cx movdqu %xmm0, (%rdi) mov %cx, 16(%rdi) RETURN .p2align 4 L(StringTail18): movdqu (%rsi), %xmm0 mov 15(%rsi), %ecx movdqu %xmm0, (%rdi) mov %ecx, 15(%rdi) RETURN .p2align 4 L(StringTail19): movdqu (%rsi), %xmm0 mov 16(%rsi), %ecx movdqu %xmm0, (%rdi) mov %ecx, 16(%rdi) RETURN .p2align 4 L(StringTail20): movdqu (%rsi), %xmm0 mov 16(%rsi), %ecx mov 20(%rsi), %dl movdqu %xmm0, (%rdi) mov %ecx, 16(%rdi) mov %dl, 20(%rdi) RETURN .p2align 4 L(StringTail21): movdqu (%rsi), %xmm0 mov 14(%rsi), %rcx movdqu %xmm0, (%rdi) mov %rcx, 14(%rdi) RETURN .p2align 4 L(StringTail22): movdqu (%rsi), %xmm0 mov 15(%rsi), %rcx movdqu %xmm0, (%rdi) mov %rcx, 15(%rdi) RETURN .p2align 4 L(StringTail23): movdqu (%rsi), %xmm0 mov 16(%rsi), %rcx movdqu %xmm0, (%rdi) mov %rcx, 16(%rdi) RETURN .p2align 4 L(StringTail24): movdqu (%rsi), %xmm0 mov 16(%rsi), %rdx mov 24(%rsi), %cl movdqu %xmm0, (%rdi) mov %rdx, 16(%rdi) mov %cl, 24(%rdi) RETURN .p2align 4 L(StringTail25): movdqu (%rsi), %xmm0 mov 16(%rsi), %rdx mov 24(%rsi), %cx movdqu %xmm0, (%rdi) mov %rdx, 16(%rdi) mov %cx, 24(%rdi) RETURN .p2align 4 L(StringTail26): movdqu (%rsi), %xmm0 mov 16(%rsi), %rdx mov 23(%rsi), %ecx movdqu %xmm0, (%rdi) mov %rdx, 16(%rdi) mov %ecx, 23(%rdi) RETURN .p2align 4 L(StringTail27): movdqu (%rsi), %xmm0 mov 16(%rsi), %rdx mov 24(%rsi), %ecx movdqu %xmm0, (%rdi) mov %rdx, 16(%rdi) mov %ecx, 24(%rdi) RETURN .p2align 4 L(StringTail28): movdqu (%rsi), %xmm0 movdqu 13(%rsi), %xmm2 movdqu %xmm0, (%rdi) movdqu %xmm2, 13(%rdi) RETURN .p2align 4 L(StringTail29): movdqu (%rsi), %xmm0 movdqu 14(%rsi), %xmm2 movdqu %xmm0, (%rdi) movdqu %xmm2, 14(%rdi) RETURN .p2align 4 L(StringTail30): movdqu (%rsi), %xmm0 movdqu 15(%rsi), %xmm2 movdqu %xmm0, (%rdi) movdqu %xmm2, 15(%rdi) RETURN .p2align 4 L(StringTail31): movdqu (%rsi), %xmm0 movdqu 16(%rsi), %xmm2 movdqu %xmm0, (%rdi) movdqu %xmm2, 16(%rdi) RETURN .p2align 4 L(StringTail32): movdqu (%rsi), %xmm0 movdqu 16(%rsi), %xmm2 mov 32(%rsi), %cl movdqu %xmm0, (%rdi) movdqu %xmm2, 16(%rdi) mov %cl, 32(%rdi) RETURN .p2align 4 L(StringTail33): movdqu (%rsi), %xmm0 movdqu 16(%rsi), %xmm2 mov 32(%rsi), %cl movdqu %xmm0, (%rdi) movdqu %xmm2, 16(%rdi) mov %cl, 32(%rdi) RETURN .p2align 4 L(CalculateSrcLenCase1): xor %r8, %r8 xor %rax, %rax L(CalculateSrcLen): pxor %xmm0, %xmm0 xor %rcx, %rcx add %r8, %rsi movdqu (%rsi), %xmm1 pcmpeqb %xmm1, %xmm0 pmovmskb %xmm0, %rdx test %rdx, %rdx jnz L(SrcLenLoopEnd) add %rax, %r9 mov $16, %rax mov %rsi, %rcx and $15, %rcx and $-16, %rsi L(SrcLenLoop): movdqa (%rsi, %rax), %xmm1 pcmpeqb %xmm1, %xmm0 pmovmskb %xmm0, %rdx test %rdx, %rdx jnz L(SrcLenLoopEnd) add $16, %rax jmp L(SrcLenLoop) .p2align 4 L(SrcLenLoopEnd): bsf %rdx, %rdx add %rdx, %rax sub %rcx, %rax RETURN END (STRLCPY) .p2align 4 .section .rodata L(ExitTable): .int JMPTBL(L(Exit0), L(ExitTable)) .int JMPTBL(L(Exit1), L(ExitTable)) .int JMPTBL(L(Exit2), L(ExitTable)) .int JMPTBL(L(Exit3), L(ExitTable)) .int JMPTBL(L(Exit4), L(ExitTable)) .int JMPTBL(L(Exit5), L(ExitTable)) .int JMPTBL(L(Exit6), L(ExitTable)) .int JMPTBL(L(Exit7), L(ExitTable)) .int JMPTBL(L(Exit8), L(ExitTable)) .int JMPTBL(L(Exit9), L(ExitTable)) .int JMPTBL(L(Exit10), L(ExitTable)) .int JMPTBL(L(Exit11), L(ExitTable)) .int JMPTBL(L(Exit12), L(ExitTable)) .int JMPTBL(L(Exit13), L(ExitTable)) .int JMPTBL(L(Exit14), L(ExitTable)) .int JMPTBL(L(Exit15), L(ExitTable)) .int JMPTBL(L(Exit16), L(ExitTable)) .int JMPTBL(L(Exit17), L(ExitTable)) .int JMPTBL(L(Exit18), L(ExitTable)) .int JMPTBL(L(Exit19), L(ExitTable)) .int JMPTBL(L(Exit20), L(ExitTable)) .int JMPTBL(L(Exit21), L(ExitTable)) .int JMPTBL(L(Exit22), L(ExitTable)) .int JMPTBL(L(Exit23), L(ExitTable)) .int JMPTBL(L(Exit24), L(ExitTable)) .int JMPTBL(L(Exit25), L(ExitTable)) .int JMPTBL(L(Exit26), L(ExitTable)) .int JMPTBL(L(Exit27), L(ExitTable)) .int JMPTBL(L(Exit28), L(ExitTable)) .int JMPTBL(L(Exit29), L(ExitTable)) .int JMPTBL(L(Exit30), L(ExitTable)) .int JMPTBL(L(Exit31), L(ExitTable)) .int JMPTBL(L(Exit32), L(ExitTable)) L(ExitStringTailTable): .int JMPTBL(L(StringTail0), L(ExitStringTailTable)) .int JMPTBL(L(StringTail1), L(ExitStringTailTable)) .int JMPTBL(L(StringTail2), L(ExitStringTailTable)) .int JMPTBL(L(StringTail3), L(ExitStringTailTable)) .int JMPTBL(L(StringTail4), L(ExitStringTailTable)) .int JMPTBL(L(StringTail5), L(ExitStringTailTable)) .int JMPTBL(L(StringTail6), L(ExitStringTailTable)) .int JMPTBL(L(StringTail7), L(ExitStringTailTable)) .int JMPTBL(L(StringTail8), L(ExitStringTailTable)) .int JMPTBL(L(StringTail9), L(ExitStringTailTable)) .int JMPTBL(L(StringTail10), L(ExitStringTailTable)) .int JMPTBL(L(StringTail11), L(ExitStringTailTable)) .int JMPTBL(L(StringTail12), L(ExitStringTailTable)) .int JMPTBL(L(StringTail13), L(ExitStringTailTable)) .int JMPTBL(L(StringTail14), L(ExitStringTailTable)) .int JMPTBL(L(StringTail15), L(ExitStringTailTable)) .int JMPTBL(L(StringTail16), L(ExitStringTailTable)) .int JMPTBL(L(StringTail17), L(ExitStringTailTable)) .int JMPTBL(L(StringTail18), L(ExitStringTailTable)) .int JMPTBL(L(StringTail19), L(ExitStringTailTable)) .int JMPTBL(L(StringTail20), L(ExitStringTailTable)) .int JMPTBL(L(StringTail21), L(ExitStringTailTable)) .int JMPTBL(L(StringTail22), L(ExitStringTailTable)) .int JMPTBL(L(StringTail23), L(ExitStringTailTable)) .int JMPTBL(L(StringTail24), L(ExitStringTailTable)) .int JMPTBL(L(StringTail25), L(ExitStringTailTable)) .int JMPTBL(L(StringTail26), L(ExitStringTailTable)) .int JMPTBL(L(StringTail27), L(ExitStringTailTable)) .int JMPTBL(L(StringTail28), L(ExitStringTailTable)) .int JMPTBL(L(StringTail29), L(ExitStringTailTable)) .int JMPTBL(L(StringTail30), L(ExitStringTailTable)) .int JMPTBL(L(StringTail31), L(ExitStringTailTable)) .int JMPTBL(L(StringTail32), L(ExitStringTailTable)) .int JMPTBL(L(StringTail33), L(ExitStringTailTable))