summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2010-07-05 05:48:41 +0000
committerChris Lattner <sabre@nondot.org>2010-07-05 05:48:41 +0000
commitc06cbad14134bcb5dc9aadb8f94a385de750b0d0 (patch)
tree854df9ff30f26cde0f50df45a0f57b5d3adf7687
parent598751ed2544291ba623e013b4e0b61bf56ca9c4 (diff)
downloadexternal_llvm-c06cbad14134bcb5dc9aadb8f94a385de750b0d0.zip
external_llvm-c06cbad14134bcb5dc9aadb8f94a385de750b0d0.tar.gz
external_llvm-c06cbad14134bcb5dc9aadb8f94a385de750b0d0.tar.bz2
some notes about suboptimal insertps's
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107613 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/README-SSE.txt31
1 files changed, 31 insertions, 0 deletions
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index d761bde..2a8506f 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -846,3 +846,34 @@ This would be better kept in the SSE unit by treating XMM0 as a 4xfloat and
doing a shuffle from v[1] to v[0] then a float store.
//===---------------------------------------------------------------------===//
+
+On SSE4 machines, we compile this code:
+
+define <2 x float> @test2(<2 x float> %Q, <2 x float> %R,
+ <2 x float> *%P) nounwind {
+ %Z = fadd <2 x float> %Q, %R
+
+ store <2 x float> %Z, <2 x float> *%P
+ ret <2 x float> %Z
+}
+
+into:
+
+_test2: ## @test2
+## BB#0:
+ insertps $0, %xmm2, %xmm2
+ insertps $16, %xmm3, %xmm2
+ insertps $0, %xmm0, %xmm3
+ insertps $16, %xmm1, %xmm3
+ addps %xmm2, %xmm3
+ movq %xmm3, (%rdi)
+ movaps %xmm3, %xmm0
+ pshufd $1, %xmm3, %xmm1
+ ## kill: XMM1<def> XMM1<kill>
+ ret
+
+The insertps's of $0 are pointless complex copies.
+
+//===---------------------------------------------------------------------===//
+
+