From a27d2baa0c1a2ec70f47ea9199b1dd6762c8a349 Mon Sep 17 00:00:00 2001 From: The Android Open Source Project Date: Tue, 21 Oct 2008 07:00:00 -0700 Subject: Initial Contribution --- libc/arch-x86/string/swab.S | 67 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 libc/arch-x86/string/swab.S (limited to 'libc/arch-x86/string/swab.S') diff --git a/libc/arch-x86/string/swab.S b/libc/arch-x86/string/swab.S new file mode 100644 index 0000000..3055860 --- /dev/null +++ b/libc/arch-x86/string/swab.S @@ -0,0 +1,67 @@ +/* $OpenBSD: swab.S,v 1.3 2005/08/07 11:30:38 espie Exp $ */ +/* + * Written by J.T. Conklin . + * Public domain. + */ + +#include + +/* + * On the i486, this code is negligibly faster than the code generated + * by gcc at about half the size. If my i386 databook is correct, it + * should be considerably faster than the gcc code on a i386. + */ + +ENTRY(swab) + pushl %esi + pushl %edi + movl 12(%esp),%esi + movl 16(%esp),%edi + movl 20(%esp),%ecx + + cld # set direction forward + + shrl $1,%ecx + testl $7,%ecx # copy first group of 1 to 7 words + jz L2 # while swaping alternate bytes. + .align 2,0x90 +L1: lodsw + rorw $8,%ax + stosw + decl %ecx + testl $7,%ecx + jnz L1 + +L2: shrl $3,%ecx # copy remainder 8 words at a time + jz L4 # while swapping alternate bytes. + .align 2,0x90 +L3: lodsw + rorw $8,%ax + stosw + lodsw + rorw $8,%ax + stosw + lodsw + rorw $8,%ax + stosw + lodsw + rorw $8,%ax + stosw + lodsw + rorw $8,%ax + stosw + lodsw + rorw $8,%ax + stosw + lodsw + rorw $8,%ax + stosw + lodsw + rorw $8,%ax + stosw + decl %ecx + jnz L3 + +L4: popl %edi + popl %esi + ret -- cgit v1.1