diff options
Diffstat (limited to 'test/CodeGen/X86/vec_splat.ll')
-rw-r--r-- | test/CodeGen/X86/vec_splat.ll | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/test/CodeGen/X86/vec_splat.ll b/test/CodeGen/X86/vec_splat.ll index 543c96e..a02e383 100644 --- a/test/CodeGen/X86/vec_splat.ll +++ b/test/CodeGen/X86/vec_splat.ll @@ -32,3 +32,19 @@ define void @test_v2sd(<2 x double>* %P, <2 x double>* %Q, double %X) nounwind { ; SSE3-LABEL: test_v2sd: ; SSE3: movddup } + +; Fold extract of a load into the load's address computation. This avoids spilling to the stack. +define <4 x float> @load_extract_splat(<4 x float>* nocapture readonly %ptr, i64 %i, i64 %j) nounwind { + %1 = getelementptr inbounds <4 x float>* %ptr, i64 %i + %2 = load <4 x float>* %1, align 16 + %3 = extractelement <4 x float> %2, i64 %j + %4 = insertelement <4 x float> undef, float %3, i32 0 + %5 = insertelement <4 x float> %4, float %3, i32 1 + %6 = insertelement <4 x float> %5, float %3, i32 2 + %7 = insertelement <4 x float> %6, float %3, i32 3 + ret <4 x float> %7 + +; AVX-LABEL: load_extract_splat +; AVX-NOT: movs +; AVX: vbroadcastss +} |