2 files changed, 49 insertions, 5 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 359c4cf..a82410a 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5100,16 +5100,26 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
 
   // If we haven't found a load, we can't narrow it.  Don't transform one with
   // multiple uses, this would require adding a new load.
-  if (!isa<LoadSDNode>(N0) || !N0.hasOneUse() ||
-      // Don't change the width of a volatile load.
-      cast<LoadSDNode>(N0)->isVolatile())
+  if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
+    return SDValue();
+
+  // Don't change the width of a volatile load.
+  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+  if (LN0->isVolatile())
     return SDValue();
 
   // Verify that we are actually reducing a load width here.
-  if (cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() < EVTBits)
+  if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
+    return SDValue();
+
+  // For the transform to be legal, the load must produce only two values
+  // (the value loaded and the chain).  Don't transform a pre-increment
+  // load, for example, which produces an extra value.  Otherwise the 
+  // transformation is not equivalent, and the downstream logic to replace
+  // uses gets things wrong.
+  if (LN0->getNumValues() > 2)
     return SDValue();
 
-  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   EVT PtrType = N0.getOperand(1).getValueType();
 
   if (PtrType == MVT::Untyped || PtrType.isExtended())
diff --git a/test/CodeGen/PowerPC/load-shift-combine.ll b/test/CodeGen/PowerPC/load-shift-combine.ll
new file mode 100644
index 0000000..a5d1224
--- /dev/null
+++ b/test/CodeGen/PowerPC/load-shift-combine.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s
+
+; This used to cause a crash.  A standard load is converted to a pre-increment
+; load.  Later the pre-increment load is combined with a subsequent SRL to
+; produce a smaller load.  This transform invalidly created a standard load
+; and propagated the produced value into uses of both produced values of the
+; pre-increment load.  The result was a crash when attempting to process an
+; add with a token-chain operand.
+
+%struct.Info = type { i32, i32, i8*, i8*, i8*, [32 x i8*], i64, [32 x i64], i64, i64, i64, [32 x i64] }
+%struct.S1847 = type { [12 x i8], [4 x i8], [8 x i8], [4 x i8], [8 x i8], [2 x i8], i8, [4 x i64], i8, [3 x i8], [4 x i8], i8, i16, [4 x %struct.anon.76], i16, i8, i8* }
+%struct.anon.76 = type { i32 }
+@info = common global %struct.Info zeroinitializer, align 8
+@fails = common global i32 0, align 4
+@a1847 = external global [5 x %struct.S1847]
+define void @test1847() nounwind {
+entry:
+  %j = alloca i32, align 4
+  %0 = load i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 8), align 8
+  %1 = load i32* @fails, align 4
+  %bf.load1 = load i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
+  %bf.clear2 = and i96 %bf.load1, 302231454903657293676543
+  %bf.set3 = or i96 %bf.clear2, -38383394772764476296921088
+  store i96 %bf.set3, i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
+  %2 = load i32* %j, align 4
+  %3 = load i32* %j, align 4
+  %inc11 = add nsw i32 %3, 1
+  store i32 %inc11, i32* %j, align 4
+  %bf.load15 = load i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
+  %bf.clear16 = and i96 %bf.load15, -18446744069414584321
+  %bf.set17 = or i96 %bf.clear16, 18446743532543672320
+  store i96 %bf.set17, i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
+  ret void
+}