summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBill Wendling <isanbard@gmail.com>2013-12-06 09:10:19 +0000
committerBill Wendling <isanbard@gmail.com>2013-12-06 09:10:19 +0000
commit7f6926930f48234484167e9ecce90f627a030702 (patch)
tree0ef4e3c016b45cfbcdc394a3bbf1391ed521f2f2
parentaee5c3e1052b2e144fdc6461bd602cdc502a93cc (diff)
downloadexternal_llvm-7f6926930f48234484167e9ecce90f627a030702.zip
external_llvm-7f6926930f48234484167e9ecce90f627a030702.tar.gz
external_llvm-7f6926930f48234484167e9ecce90f627a030702.tar.bz2
Merging r196508:
------------------------------------------------------------------------ r196508 | arnolds | 2013-12-05 07:14:40 -0800 (Thu, 05 Dec 2013) | 12 lines SLPVectorizer: An in-tree vectorized entry cannot also be a scalar external use We were creating external uses for scalar values in MustGather entries that also had a ScalarToTreeEntry (they also are present in a vectorized tuple). This meant we would keep a value 'alive' as a scalar and vectorized causing havoc. This is not necessary because when we create a MustGather vector we explicitly create external uses entries for the insertelement instructions of the MustGather vector elements. Fixes PR18129. radar://15582184 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@196571 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp6
-rw-r--r--test/Transforms/SLPVectorizer/X86/external_user.ll35
2 files changed, 36 insertions, 5 deletions
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 9f18596..c72b51f 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -564,10 +564,8 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ValueSet *Rdx) {
UE = Scalar->use_end(); User != UE; ++User) {
DEBUG(dbgs() << "SLP: Checking user:" << **User << ".\n");
- bool Gathered = MustGather.count(*User);
-
// Skip in-tree scalars that become vectors.
- if (ScalarToTreeEntry.count(*User) && !Gathered) {
+ if (ScalarToTreeEntry.count(*User)) {
DEBUG(dbgs() << "SLP: \tInternal user will be removed:" <<
**User << ".\n");
int Idx = ScalarToTreeEntry[*User]; (void) Idx;
@@ -1638,8 +1636,6 @@ Value *BoUpSLP::vectorizeTree() {
for (Value::use_iterator User = Scalar->use_begin(),
UE = Scalar->use_end(); User != UE; ++User) {
DEBUG(dbgs() << "SLP: \tvalidating user:" << **User << ".\n");
- assert(!MustGather.count(*User) &&
- "Replacing gathered value with undef");
assert((ScalarToTreeEntry.count(*User) ||
// It is legal to replace the reduction users by undef.
diff --git a/test/Transforms/SLPVectorizer/X86/external_user.ll b/test/Transforms/SLPVectorizer/X86/external_user.ll
index 22f0e64..6d09aa6 100644
--- a/test/Transforms/SLPVectorizer/X86/external_user.ll
+++ b/test/Transforms/SLPVectorizer/X86/external_user.ll
@@ -59,3 +59,38 @@ for.end: ; preds = %for.body
ret double %mul3
}
+; A need-to-gather entry cannot be an external use of the scalar element.
+; Instead the insertelement instructions of the need-to-gather entry are the
+; external users.
+; This test would assert because we would keep the scalar fpext and fadd alive.
+; PR18129
+
+; CHECK-LABEL: needtogather
+define i32 @needtogather(double *noalias %a, i32 *noalias %b, float * noalias %c,
+ i32 * noalias %d) {
+entry:
+ %0 = load i32* %d, align 4
+ %conv = sitofp i32 %0 to float
+ %1 = load float* %c
+ %sub = fsub float 0.000000e+00, %1
+ %mul = fmul float %sub, 0.000000e+00
+ %add = fadd float %conv, %mul
+ %conv1 = fpext float %add to double
+ %sub3 = fsub float 1.000000e+00, %1
+ %mul4 = fmul float %sub3, 0.000000e+00
+ %add5 = fadd float %conv, %mul4
+ %conv6 = fpext float %add5 to double
+ %tobool = fcmp une float %add, 0.000000e+00
+ br i1 %tobool, label %if.then, label %if.end
+
+if.then:
+ br label %if.end
+
+if.end:
+ %storemerge = phi double [ %conv6, %if.then ], [ %conv1, %entry ]
+ %e.0 = phi double [ %conv1, %if.then ], [ %conv6, %entry ]
+ store double %storemerge, double* %a, align 8
+ %conv7 = fptosi double %e.0 to i32
+ store i32 %conv7, i32* %b, align 4
+ ret i32 undef
+}