diff options
Diffstat (limited to 'libc/arch-x86/string/swab.S')
-rw-r--r-- | libc/arch-x86/string/swab.S | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/libc/arch-x86/string/swab.S b/libc/arch-x86/string/swab.S new file mode 100644 index 0000000..3055860 --- /dev/null +++ b/libc/arch-x86/string/swab.S @@ -0,0 +1,67 @@ +/* $OpenBSD: swab.S,v 1.3 2005/08/07 11:30:38 espie Exp $ */ +/* + * Written by J.T. Conklin <jtc@netbsd.org>. + * Public domain. + */ + +#include <machine/asm.h> + +/* + * On the i486, this code is negligibly faster than the code generated + * by gcc at about half the size. If my i386 databook is correct, it + * should be considerably faster than the gcc code on a i386. + */ + +ENTRY(swab) + pushl %esi + pushl %edi + movl 12(%esp),%esi + movl 16(%esp),%edi + movl 20(%esp),%ecx + + cld # set direction forward + + shrl $1,%ecx + testl $7,%ecx # copy first group of 1 to 7 words + jz L2 # while swaping alternate bytes. + .align 2,0x90 +L1: lodsw + rorw $8,%ax + stosw + decl %ecx + testl $7,%ecx + jnz L1 + +L2: shrl $3,%ecx # copy remainder 8 words at a time + jz L4 # while swapping alternate bytes. + .align 2,0x90 +L3: lodsw + rorw $8,%ax + stosw + lodsw + rorw $8,%ax + stosw + lodsw + rorw $8,%ax + stosw + lodsw + rorw $8,%ax + stosw + lodsw + rorw $8,%ax + stosw + lodsw + rorw $8,%ax + stosw + lodsw + rorw $8,%ax + stosw + lodsw + rorw $8,%ax + stosw + decl %ecx + jnz L3 + +L4: popl %edi + popl %esi + ret |