improve comments in the unpcklps generating logic, introduce

a new EltStride variable instead of reusing NumElems variable for a non-obvious purpose. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112377 91177308-0d34-0410-b5e6-96231b3b80d8
author: Chris Lattner <sabre@nondot.org> 2010-08-28 17:15:43 +0000
committer: Chris Lattner <sabre@nondot.org> 2010-08-28 17:15:43 +0000
commit: 6e80e449261e259d41a247603228be7802c112d1 (patch)
tree: 2431832564f4b66944e40fddc85ee903829e71dc /lib/Target/X86/X86ISelLowering.cpp
parent: 44edb0bd0c56483d6a7c473704d70ab67611d371 (diff)
download: external_llvm-6e80e449261e259d41a247603228be7802c112d1.zip
external_llvm-6e80e449261e259d41a247603228be7802c112d1.tar.gz
external_llvm-6e80e449261e259d41a247603228be7802c112d1.tar.bz2
1 files changed, 18 insertions, 11 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 5a7ed4f..91fc26c 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -4040,8 +4040,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
 SDValue
 X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
-  // All zero's are handled with pxor in SSE2 and above, xorps in SSE1 and
-  // all one's are handled with pcmpeqd. In AVX, zero's are handled with
+  // All zero's are handled with pxor in SSE2 and above, xorps in SSE1.
+  // All one's are handled with pcmpeqd. In AVX, zero's are handled with
   // vpxor in 128-bit and xor{pd,ps} in 256-bit, but no 256 version of pcmpeqd
   // is present, so AllOnes is ignored.
   if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
@@ -4288,18 +4288,25 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
       return V[0];
     }
     
-    // Otherwise, expand into a number of unpckl*
-    // e.g. for v4f32
+    // Otherwise, expand into a number of unpckl*, start by extending each of
+    // our (non-undef) elements to the full vector width with the element in the
+    // bottom slot of the vector (which generates no code for SSE).
+    for (unsigned i = 0; i < NumElems; ++i) {
+      if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
+        V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
+      else
+        V[i] = DAG.getUNDEF(VT);
+    }
+
+    // Next, we iteratively mix elements, e.g. for v4f32:
     //   Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
     //         : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
     //   Step 2: unpcklps X, Y ==>    <3, 2, 1, 0>
-    for (unsigned i = 0; i < NumElems; ++i)
-      V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
-    NumElems >>= 1;
-    while (NumElems != 0) {
-      for (unsigned i = 0; i < NumElems; ++i)
-        V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + NumElems]);
-      NumElems >>= 1;
+    unsigned EltStride = NumElems >> 1;
+    while (EltStride != 0) {
+      for (unsigned i = 0; i < EltStride; ++i)
+        V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]);
+      EltStride >>= 1;
     }
     return V[0];
   }
author	Chris Lattner <sabre@nondot.org>	2010-08-28 17:15:43 +0000
committer	Chris Lattner <sabre@nondot.org>	2010-08-28 17:15:43 +0000
commit	6e80e449261e259d41a247603228be7802c112d1 (patch)
tree	2431832564f4b66944e40fddc85ee903829e71dc /lib/Target/X86/X86ISelLowering.cpp
parent	44edb0bd0c56483d6a7c473704d70ab67611d371 (diff)
download	external_llvm-6e80e449261e259d41a247603228be7802c112d1.zip external_llvm-6e80e449261e259d41a247603228be7802c112d1.tar.gz external_llvm-6e80e449261e259d41a247603228be7802c112d1.tar.bz2