diff options
author | Chris Lattner <sabre@nondot.org> | 2009-02-04 19:09:07 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2009-02-04 19:09:07 +0000 |
commit | 8e041027074f55c333c3579640ac48c5601a5a7c (patch) | |
tree | 413415a858893dcc7330e8ea8246685242c7431d /lib/Target/X86/README-SSE.txt | |
parent | 34240c53edb0b63ce0db6f98d7bc19a9e7453276 (diff) | |
download | external_llvm-8e041027074f55c333c3579640ac48c5601a5a7c.zip external_llvm-8e041027074f55c333c3579640ac48c5601a5a7c.tar.gz external_llvm-8e041027074f55c333c3579640ac48c5601a5a7c.tar.bz2 |
Bill implemented this.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@63752 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/README-SSE.txt')
-rw-r--r-- | lib/Target/X86/README-SSE.txt | 36 |
1 files changed, 0 insertions, 36 deletions
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index 67cad42..71ad51c 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -841,42 +841,6 @@ _t: movaps %xmm0, 0 //===---------------------------------------------------------------------===// -rdar://6037315 - -llvm-gcc-4.2 does the following for uint32_t -> float conversions on i386: - - uint32_t x; - float y = (float)x; - -becomes: - -movl %eax, -8(%ebp) // write x to the stack -movl $0x3ff00000, -4(%ebp) // 2^52 + x as a double at -4(%ebp) -movsd -8(%ebp), %xmm0 -subsd [2^52 double], %xmm0 // subtract 2^52 -- this is exact -cvtsd2ss %xmm0, %xmm0 // convert to single -- rounding happens here - -On merom/yonah, this takes a substantial stall. The following is a much -better option: - -movd %eax, %xmm0 // load x into low word of xmm0 -movsd [2^52 double], %xmm1 // load 2^52 into xmm1 -orpd %xmm1, %xmm0 // 2^52 + x in double precision -subsd %xmm1, %xmm0 // x in double precision -cvtsd2ss %xmm0, %xmm0 // x rounded to single precision - -IF we don't already need PIC, then the following is even faster still, at a -small cost to code size: - -movl $0x3ff00000, %ecx // conjure high word of 2^52 -movd %ecx, %xmm1 -movss %eax, %xmm0 // load x into low word of xmm0 -psllq $32, %xmm1 // 2^52 -orpd %xmm1, %xmm0 // 2^52 + x in double precision -subsd %xmm1, %xmm0 // x in double precision -cvtsd2ss %xmm0, %xmm0 // x in single precision - -//===---------------------------------------------------------------------===// rdar://5907648 This function: |