diff options
author | Chris Lattner <sabre@nondot.org> | 2008-02-21 06:51:29 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2008-02-21 06:51:29 +0000 |
commit | 7c1687c196e8dc89a4722e0463965f0b0f5f399c (patch) | |
tree | 22d8d533d42a2d63d5d0febf31d527862c843e80 | |
parent | ab0b949e0e9de452f3b052b11634ab761e008b23 (diff) | |
download | external_llvm-7c1687c196e8dc89a4722e0463965f0b0f5f399c.zip external_llvm-7c1687c196e8dc89a4722e0463965f0b0f5f399c.tar.gz external_llvm-7c1687c196e8dc89a4722e0463965f0b0f5f399c.tar.bz2 |
Dan implemented one multiply issue. Replace it with another. :)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@47431 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/README.txt | 45 |
1 files changed, 33 insertions, 12 deletions
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index f6a0b78..736e299 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -1392,23 +1392,44 @@ _foo: //===---------------------------------------------------------------------===// -We're missing an obvious fold of a load into imul: +We're codegen'ing multiply of long longs inefficiently: -int test(long a, long b) { return a * b; } +unsigned long long LLM(unsigned long long arg1, unsigned long long arg2) { + return arg1 * arg2; +} -LLVM produces: -_test: - movl 4(%esp), %ecx - movl 8(%esp), %eax - imull %ecx, %eax - ret +We compile to (fomit-frame-pointer): -vs: -_test: - movl 8(%esp), %eax - imull 4(%esp), %eax +_LLM: + pushl %esi + movl 8(%esp), %ecx + movl 16(%esp), %esi + movl %esi, %eax + mull %ecx + imull 12(%esp), %esi + addl %edx, %esi + imull 20(%esp), %ecx + movl %esi, %edx + addl %ecx, %edx + popl %esi + ret + +This looks like a scheduling deficiency and lack of remat of the load from +the argument area. ICC apparently produces: + + movl 8(%esp), %ecx + imull 12(%esp), %ecx + movl 16(%esp), %eax + imull 4(%esp), %eax + addl %eax, %ecx + movl 4(%esp), %eax + mull 12(%esp) + addl %ecx, %edx ret +Note that it remat'd loads from 4(esp) and 12(esp). See this GCC PR: +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17236 + //===---------------------------------------------------------------------===// We can fold a store into "zeroing a reg". Instead of: |