diff options
author | Bill Wendling <isanbard@gmail.com> | 2013-12-07 09:39:35 +0000 |
---|---|---|
committer | Bill Wendling <isanbard@gmail.com> | 2013-12-07 09:39:35 +0000 |
commit | 2990853ea8bf4888b179ac6c493836b83769e87b (patch) | |
tree | 1996e9d95c75d32bbdc981d2cfb151c3b82a5a55 /test/CodeGen | |
parent | 31928dfc03d92322f9f2fb1c4a7878024d3cc9d1 (diff) | |
download | external_llvm-2990853ea8bf4888b179ac6c493836b83769e87b.zip external_llvm-2990853ea8bf4888b179ac6c493836b83769e87b.tar.gz external_llvm-2990853ea8bf4888b179ac6c493836b83769e87b.tar.bz2 |
Merging r196261:
------------------------------------------------------------------------
r196261 | hliao | 2013-12-03 01:17:32 -0800 (Tue, 03 Dec 2013) | 13 lines
Enhance the fix of PR17631
- The fix to PR17631 fixes part of the cases where 'vzeroupper' should
not be issued before 'call' insn. There're other cases where helper
calls will be inserted not limited to epilog. These helper calls do
not follow the standard calling convention and won't clobber any YMM
registers. (So far, all call conventions will clobber any or part of
YMM registers.)
This patch enhances the previous fix to cover more cases 'vzerosupper' should
not be inserted by checking if that function call won't clobber any YMM
registers and skipping it if so.
------------------------------------------------------------------------
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@196652 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen')
-rw-r--r-- | test/CodeGen/X86/pr17631.ll | 20 |
1 files changed, 16 insertions, 4 deletions
diff --git a/test/CodeGen/X86/pr17631.ll b/test/CodeGen/X86/pr17631.ll index a572ff2..98f951f 100644 --- a/test/CodeGen/X86/pr17631.ll +++ b/test/CodeGen/X86/pr17631.ll @@ -1,16 +1,16 @@ ; RUN: llc < %s -mcpu=core-avx-i -mtriple=i386-pc-win32 | FileCheck %s - + %struct_type = type { [64 x <8 x float>], <8 x float> } - + ; Function Attrs: nounwind readnone declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) - + ; Function Attrs: nounwind define i32 @equal(<8 x i32> %A) { allocas: %first_alloc = alloca [64 x <8 x i32>] %second_alloc = alloca %struct_type - + %A1 = bitcast <8 x i32> %A to <8 x float> %A2 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %A1) ret i32 %A2 @@ -20,3 +20,15 @@ allocas: ; CHECK-NOT: vzeroupper ; CHECK: _chkstk ; CHECK: ret + +define <8 x float> @foo(<8 x float> %y, i64* %p, double %x) { + %i = fptoui double %x to i64 + store i64 %i, i64* %p + %ret = fadd <8 x float> %y, %y + ret <8 x float> %ret +} + +; CHECK: foo +; CHECK-NOT: vzeroupper +; CHECK: _ftol2 +; CHECK: ret |