summaryrefslogtreecommitdiffstats
path: root/lib/CodeGen
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2010-08-25 22:49:25 +0000
committerChris Lattner <sabre@nondot.org>2010-08-25 22:49:25 +0000
commite6f7c267df11a44679c35dec79787fbc276839fb (patch)
tree831f622cb82ef17861f20320751316a0edf4fe9f /lib/CodeGen
parente6e0018d3e6b5a7dae8b8ecf7ca3f31476e07800 (diff)
downloadexternal_llvm-e6f7c267df11a44679c35dec79787fbc276839fb.zip
external_llvm-e6f7c267df11a44679c35dec79787fbc276839fb.tar.gz
external_llvm-e6f7c267df11a44679c35dec79787fbc276839fb.tar.bz2
Change handling of illegal vector types to widen when possible instead of
expanding: e.g. <2 x float> -> <4 x float> instead of -> 2 floats. This affects two places in the code: handling cross block values and handling function return and arguments. Since vectors are already widened by legalizetypes, this gives us much better code and unblocks x86-64 abi and SPU abi work. For example, this (which is a silly example of a cross-block value): define <4 x float> @test2(<4 x float> %A) nounwind { %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> %C = fadd <2 x float> %B, %B br label %BB BB: %D = fadd <2 x float> %C, %C %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> ret <4 x float> %E } Now compiles into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 addps %xmm0, %xmm0 ret previously it compiled into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 pshufd $1, %xmm0, %xmm1 ## kill: XMM0<def> XMM0<kill> XMM0<def> insertps $0, %xmm0, %xmm0 insertps $16, %xmm1, %xmm0 addps %xmm0, %xmm0 ret This implements rdar://8230384 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112101 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp61
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp82
2 files changed, 97 insertions, 46 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index a8a8ecb..35e7520 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -252,8 +252,21 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
if (PartVT == ValueVT)
return Val;
- if (PartVT.isVector()) // Vector/Vector bitcast.
+ if (PartVT.isVector()) {
+ // If the element type of the source/dest vectors are the same, but the
+ // parts vector has more elements than the value vector, then we have a
+ // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
+ // elements we want.
+ if (PartVT.getVectorElementType() == ValueVT.getVectorElementType()) {
+ assert(PartVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
+ "Cannot narrow, it would be a lossy transformation");
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
+ DAG.getIntPtrConstant(0));
+ }
+
+ // Vector/Vector bitcast.
return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val);
+ }
assert(ValueVT.getVectorElementType() == PartVT &&
ValueVT.getVectorNumElements() == 1 &&
@@ -392,16 +405,39 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (NumParts == 1) {
- if (PartVT != ValueVT) {
- if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
- Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val);
- } else {
- assert(ValueVT.getVectorElementType() == PartVT &&
- ValueVT.getVectorNumElements() == 1 &&
- "Only trivial vector-to-scalar conversions should get here!");
- Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
- PartVT, Val, DAG.getIntPtrConstant(0));
- }
+ if (PartVT == ValueVT) {
+ // Nothing to do.
+ } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
+ // Bitconvert vector->vector case.
+ Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val);
+ } else if (PartVT.isVector() &&
+ PartVT.getVectorElementType() == ValueVT.getVectorElementType()&&
+ PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
+ EVT ElementVT = PartVT.getVectorElementType();
+ // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
+ // undef elements.
+ SmallVector<SDValue, 16> Ops;
+ for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ ElementVT, Val, DAG.getIntPtrConstant(i)));
+
+ for (unsigned i = ValueVT.getVectorNumElements(),
+ e = PartVT.getVectorNumElements(); i != e; ++i)
+ Ops.push_back(DAG.getUNDEF(ElementVT));
+
+ Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size());
+
+ // FIXME: Use CONCAT for 2x -> 4x.
+
+ //SDValue UndefElts = DAG.getUNDEF(VectorTy);
+ //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
+ } else {
+ // Vector -> scalar conversion.
+ assert(ValueVT.getVectorElementType() == PartVT &&
+ ValueVT.getVectorNumElements() == 1 &&
+ "Only trivial vector-to-scalar conversions should get here!");
+ Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ PartVT, Val, DAG.getIntPtrConstant(0));
}
Parts[0] = Val;
@@ -428,8 +464,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
DAG.getIntPtrConstant(i * (NumElements / NumIntermediates)));
else
Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
- IntermediateVT, Val,
- DAG.getIntPtrConstant(i));
+ IntermediateVT, Val, DAG.getIntPtrConstant(i));
}
// Split the intermediate operands into legal parts.
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 9772d55..077fd1d 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -697,6 +697,7 @@ TargetLowering::findRepresentativeClass(EVT VT) const {
return std::make_pair(BestRC, 1);
}
+
/// computeRegisterProperties - Once all of the register classes are added,
/// this allows us to compute derived properties we expose.
void TargetLowering::computeRegisterProperties() {
@@ -782,6 +783,28 @@ void TargetLowering::computeRegisterProperties() {
MVT VT = (MVT::SimpleValueType)i;
if (isTypeLegal(VT)) continue;
+ // Determine if there is a legal wider type. If so, we should promote to
+ // that wider vector type.
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NElts = VT.getVectorNumElements();
+ if (NElts != 1) {
+ bool IsLegalWiderType = false;
+ for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ EVT SVT = (MVT::SimpleValueType)nVT;
+ if (SVT.getVectorElementType() == EltVT &&
+ SVT.getVectorNumElements() > NElts &&
+ isTypeSynthesizable(SVT)) {
+ TransformToType[i] = SVT;
+ RegisterTypeForVT[i] = SVT;
+ NumRegistersForVT[i] = 1;
+ ValueTypeActions.setTypeAction(VT, Promote);
+ IsLegalWiderType = true;
+ break;
+ }
+ }
+ if (IsLegalWiderType) continue;
+ }
+
MVT IntermediateVT;
EVT RegisterVT;
unsigned NumIntermediates;
@@ -790,30 +813,14 @@ void TargetLowering::computeRegisterProperties() {
RegisterVT, this);
RegisterTypeForVT[i] = RegisterVT;
- // Determine if there is a legal wider type.
- bool IsLegalWiderType = false;
- EVT EltVT = VT.getVectorElementType();
- unsigned NElts = VT.getVectorNumElements();
- for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
- EVT SVT = (MVT::SimpleValueType)nVT;
- if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT &&
- SVT.getVectorNumElements() > NElts && NElts != 1) {
- TransformToType[i] = SVT;
- ValueTypeActions.setTypeAction(VT, Promote);
- IsLegalWiderType = true;
- break;
- }
- }
- if (!IsLegalWiderType) {
- EVT NVT = VT.getPow2VectorType();
- if (NVT == VT) {
- // Type is already a power of 2. The default action is to split.
- TransformToType[i] = MVT::Other;
- ValueTypeActions.setTypeAction(VT, Expand);
- } else {
- TransformToType[i] = NVT;
- ValueTypeActions.setTypeAction(VT, Promote);
- }
+ EVT NVT = VT.getPow2VectorType();
+ if (NVT == VT) {
+ // Type is already a power of 2. The default action is to split.
+ TransformToType[i] = MVT::Other;
+ ValueTypeActions.setTypeAction(VT, Expand);
+ } else {
+ TransformToType[i] = NVT;
+ ValueTypeActions.setTypeAction(VT, Promote);
}
}
@@ -857,8 +864,21 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
EVT &IntermediateVT,
unsigned &NumIntermediates,
EVT &RegisterVT) const {
- // Figure out the right, legal destination reg to copy into.
unsigned NumElts = VT.getVectorNumElements();
+
+ // If there is a wider vector type with the same element type as this one,
+ // we should widen to that legal vector type. This handles things like
+ // <2 x float> -> <4 x float>.
+ if (NumElts != 1 && getTypeAction(Context, VT) == Promote) {
+ RegisterVT = getTypeToTransformTo(Context, VT);
+ if (isTypeLegal(RegisterVT)) {
+ IntermediateVT = RegisterVT;
+ NumIntermediates = 1;
+ return 1;
+ }
+ }
+
+ // Figure out the right, legal destination reg to copy into.
EVT EltTy = VT.getVectorElementType();
unsigned NumVectorRegs = 1;
@@ -887,16 +907,12 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
EVT DestVT = getRegisterType(Context, NewVT);
RegisterVT = DestVT;
- if (DestVT.bitsLT(NewVT)) {
- // Value is expanded, e.g. i64 -> i16.
+ if (DestVT.bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
- } else {
- // Otherwise, promotion or legal types use the same number of registers as
- // the vector decimated to the appropriate level.
- return NumVectorRegs;
- }
- return 1;
+ // Otherwise, promotion or legal types use the same number of registers as
+ // the vector decimated to the appropriate level.
+ return NumVectorRegs;
}
/// Get the EVTs and ArgFlags collections that represent the legalized return