summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp4
-rw-r--r--test/Transforms/InstCombine/vec_shuffle.ll27
2 files changed, 31 insertions, 0 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 4f71db1..bbfad86 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -336,6 +336,10 @@ static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask,
if (VecOp == RHS) {
Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
+ // Update Mask to reflect that `ScalarOp' has been inserted at
+ // position `InsertedIdx' within the vector returned by IEI.
+ Mask[InsertedIdx % NumElts] = Mask[ExtractedIdx];
+
// Everything but the extracted element is replaced with the RHS.
for (unsigned i = 0; i != NumElts; ++i) {
if (i != InsertedIdx)
diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll
index 14f5321..37d4d56 100644
--- a/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/test/Transforms/InstCombine/vec_shuffle.ll
@@ -196,3 +196,30 @@ define <4 x i16> @test13e(<4 x i16> %lhs, <4 x i16> %rhs) {
<4 x i16> %lhs, <4 x i16> %rhs
ret <4 x i16> %A
}
+
+; Check that sequences of insert/extract element are
+; collapsed into shuffle instruction with correct shuffle indexes.
+
+define <4 x float> @test14a(<4 x float> %LHS, <4 x float> %RHS) {
+; CHECK: @test14a
+; CHECK-NEXT: shufflevector <4 x float> %LHS, <4 x float> %RHS, <4 x i32> <i32 4, i32 0, i32 6, i32 6>
+; CHECK-NEXT: ret <4 x float> %tmp4
+ %tmp1 = extractelement <4 x float> %LHS, i32 0
+ %tmp2 = insertelement <4 x float> %RHS, float %tmp1, i32 1
+ %tmp3 = extractelement <4 x float> %RHS, i32 2
+ %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 3
+ ret <4 x float> %tmp4
+}
+
+define <4 x float> @test14b(<4 x float> %LHS, <4 x float> %RHS) {
+; CHECK: @test14b
+; CHECK-NEXT: shufflevector <4 x float> %LHS, <4 x float> %RHS, <4 x i32> <i32 4, i32 3, i32 6, i32 6>
+; CHECK-NEXT: ret <4 x float> %tmp5
+ %tmp0 = extractelement <4 x float> %LHS, i32 3
+ %tmp1 = insertelement <4 x float> %RHS, float %tmp0, i32 0
+ %tmp2 = extractelement <4 x float> %tmp1, i32 0
+ %tmp3 = insertelement <4 x float> %RHS, float %tmp2, i32 1
+ %tmp4 = extractelement <4 x float> %RHS, i32 2
+ %tmp5 = insertelement <4 x float> %tmp3, float %tmp4, i32 3
+ ret <4 x float> %tmp5
+}