summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2008-02-21 06:51:29 +0000
committerChris Lattner <sabre@nondot.org>2008-02-21 06:51:29 +0000
commit7c1687c196e8dc89a4722e0463965f0b0f5f399c (patch)
tree22d8d533d42a2d63d5d0febf31d527862c843e80
parentab0b949e0e9de452f3b052b11634ab761e008b23 (diff)
downloadexternal_llvm-7c1687c196e8dc89a4722e0463965f0b0f5f399c.zip
external_llvm-7c1687c196e8dc89a4722e0463965f0b0f5f399c.tar.gz
external_llvm-7c1687c196e8dc89a4722e0463965f0b0f5f399c.tar.bz2
Dan implemented one multiply issue. Replace it with another. :)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@47431 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/README.txt45
1 files changed, 33 insertions, 12 deletions
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index f6a0b78..736e299 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1392,23 +1392,44 @@ _foo:
//===---------------------------------------------------------------------===//
-We're missing an obvious fold of a load into imul:
+We're codegen'ing multiply of long longs inefficiently:
-int test(long a, long b) { return a * b; }
+unsigned long long LLM(unsigned long long arg1, unsigned long long arg2) {
+ return arg1 * arg2;
+}
-LLVM produces:
-_test:
- movl 4(%esp), %ecx
- movl 8(%esp), %eax
- imull %ecx, %eax
- ret
+We compile to (fomit-frame-pointer):
-vs:
-_test:
- movl 8(%esp), %eax
- imull 4(%esp), %eax
+_LLM:
+ pushl %esi
+ movl 8(%esp), %ecx
+ movl 16(%esp), %esi
+ movl %esi, %eax
+ mull %ecx
+ imull 12(%esp), %esi
+ addl %edx, %esi
+ imull 20(%esp), %ecx
+ movl %esi, %edx
+ addl %ecx, %edx
+ popl %esi
+ ret
+
+This looks like a scheduling deficiency and lack of remat of the load from
+the argument area. ICC apparently produces:
+
+ movl 8(%esp), %ecx
+ imull 12(%esp), %ecx
+ movl 16(%esp), %eax
+ imull 4(%esp), %eax
+ addl %eax, %ecx
+ movl 4(%esp), %eax
+ mull 12(%esp)
+ addl %ecx, %edx
ret
+Note that it remat'd loads from 4(esp) and 12(esp). See this GCC PR:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17236
+
//===---------------------------------------------------------------------===//
We can fold a store into "zeroing a reg". Instead of: