Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 6f6fca1

Browse files
committed
[VPlan] Re-use common cast cost logic for VPReplicateRecipe (NFCI).
Move the logic to compute cast costs to getCostForRecipeWithOpcode and use for VPReplicateRecipe. This should match the costs computed by the legacy cost model for scalar casts.
1 parent 746eced commit 6f6fca1

1 file changed

Lines changed: 94 additions & 59 deletions

File tree

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 94 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -978,6 +978,81 @@ InstructionCost VPRecipeWithIRFlags::getCostForRecipeWithOpcode(
978978
Ctx.CostKind, {TTI::OK_AnyValue, TTI::OP_None},
979979
{TTI::OK_AnyValue, TTI::OP_None}, CtxI);
980980
}
981+
case Instruction::BitCast:
982+
return 0;
983+
case Instruction::SExt:
984+
case Instruction::ZExt:
985+
case Instruction::FPToUI:
986+
case Instruction::FPToSI:
987+
case Instruction::FPExt:
988+
case Instruction::PtrToInt:
989+
case Instruction::IntToPtr:
990+
case Instruction::SIToFP:
991+
case Instruction::UIToFP:
992+
case Instruction::Trunc:
993+
case Instruction::FPTrunc: {
994+
// Computes the CastContextHint from a recipe that may access memory.
995+
auto ComputeCCH = [&](const VPRecipeBase *R) -> TTI::CastContextHint {
996+
if (isa<VPInterleaveBase>(R))
997+
return TTI::CastContextHint::Interleave;
998+
if (const auto *ReplicateRecipe = dyn_cast<VPReplicateRecipe>(R)) {
999+
// Only compute CCH for memory operations, matching the legacy model
1000+
// which only considers loads/stores for cast context hints.
1001+
auto *UI = cast<Instruction>(ReplicateRecipe->getUnderlyingValue());
1002+
if (!isa<LoadInst, StoreInst>(UI))
1003+
return TTI::CastContextHint::None;
1004+
return ReplicateRecipe->isPredicated() ? TTI::CastContextHint::Masked
1005+
: TTI::CastContextHint::Normal;
1006+
}
1007+
const auto *WidenMemoryRecipe = dyn_cast<VPWidenMemoryRecipe>(R);
1008+
if (WidenMemoryRecipe == nullptr)
1009+
return TTI::CastContextHint::None;
1010+
if (VF.isScalar())
1011+
return TTI::CastContextHint::Normal;
1012+
if (!WidenMemoryRecipe->isConsecutive())
1013+
return TTI::CastContextHint::GatherScatter;
1014+
if (WidenMemoryRecipe->isReverse())
1015+
return TTI::CastContextHint::Reversed;
1016+
if (WidenMemoryRecipe->isMasked())
1017+
return TTI::CastContextHint::Masked;
1018+
return TTI::CastContextHint::Normal;
1019+
};
1020+
1021+
VPValue *Operand = getOperand(0);
1022+
TTI::CastContextHint CCH = TTI::CastContextHint::None;
1023+
// For Trunc/FPTrunc, get the context from the only user.
1024+
if (Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) {
1025+
auto GetOnlyUser = [](const VPSingleDefRecipe *R) -> VPRecipeBase * {
1026+
if (R->getNumUsers() == 0 || R->hasMoreThanOneUniqueUser())
1027+
return nullptr;
1028+
return dyn_cast<VPRecipeBase>(*R->user_begin());
1029+
};
1030+
if (VPRecipeBase *Recipe = GetOnlyUser(this)) {
1031+
if (match(Recipe, m_Reverse(m_VPValue())))
1032+
Recipe = GetOnlyUser(cast<VPInstruction>(Recipe));
1033+
if (Recipe)
1034+
CCH = ComputeCCH(Recipe);
1035+
}
1036+
}
1037+
// For Z/Sext, get the context from the operand.
1038+
else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt ||
1039+
Opcode == Instruction::FPExt) {
1040+
if (auto *Recipe = Operand->getDefiningRecipe()) {
1041+
VPValue *ReverseOp;
1042+
if (match(Recipe, m_Reverse(m_VPValue(ReverseOp))))
1043+
Recipe = ReverseOp->getDefiningRecipe();
1044+
if (Recipe)
1045+
CCH = ComputeCCH(Recipe);
1046+
}
1047+
}
1048+
1049+
auto *ScalarSrcTy = Ctx.Types.inferScalarType(Operand);
1050+
Type *SrcTy = VF.isVector() ? toVectorTy(ScalarSrcTy, VF) : ScalarSrcTy;
1051+
// Arm TTI will use the underlying instruction to determine the cost.
1052+
return Ctx.TTI.getCastInstrCost(
1053+
Opcode, ResultTy, SrcTy, CCH, Ctx.CostKind,
1054+
dyn_cast_if_present<Instruction>(getUnderlyingValue()));
1055+
}
9811056
}
9821057
llvm_unreachable("called for unsupported opcode");
9831058
}
@@ -2249,65 +2324,7 @@ InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF,
22492324
// reduction in a smaller type.
22502325
if (!getUnderlyingValue())
22512326
return 0;
2252-
// Computes the CastContextHint from a recipes that may access memory.
2253-
auto ComputeCCH = [&](const VPRecipeBase *R) -> TTI::CastContextHint {
2254-
if (VF.isScalar())
2255-
return TTI::CastContextHint::Normal;
2256-
if (isa<VPInterleaveBase>(R))
2257-
return TTI::CastContextHint::Interleave;
2258-
if (const auto *ReplicateRecipe = dyn_cast<VPReplicateRecipe>(R))
2259-
return ReplicateRecipe->isPredicated() ? TTI::CastContextHint::Masked
2260-
: TTI::CastContextHint::Normal;
2261-
const auto *WidenMemoryRecipe = dyn_cast<VPWidenMemoryRecipe>(R);
2262-
if (WidenMemoryRecipe == nullptr)
2263-
return TTI::CastContextHint::None;
2264-
if (!WidenMemoryRecipe->isConsecutive())
2265-
return TTI::CastContextHint::GatherScatter;
2266-
if (WidenMemoryRecipe->isReverse())
2267-
return TTI::CastContextHint::Reversed;
2268-
if (WidenMemoryRecipe->isMasked())
2269-
return TTI::CastContextHint::Masked;
2270-
return TTI::CastContextHint::Normal;
2271-
};
2272-
2273-
VPValue *Operand = getOperand(0);
2274-
TTI::CastContextHint CCH = TTI::CastContextHint::None;
2275-
// For Trunc/FPTrunc, get the context from the only user.
2276-
if (Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) {
2277-
auto GetOnlyUser = [](const VPSingleDefRecipe *R) -> VPRecipeBase * {
2278-
if (R->getNumUsers() == 0 || R->hasMoreThanOneUniqueUser())
2279-
return nullptr;
2280-
return dyn_cast<VPRecipeBase>(*R->user_begin());
2281-
};
2282-
2283-
if (VPRecipeBase *Recipe = GetOnlyUser(this)) {
2284-
if (match(Recipe, m_Reverse(m_VPValue())))
2285-
Recipe = GetOnlyUser(cast<VPInstruction>(Recipe));
2286-
if (Recipe)
2287-
CCH = ComputeCCH(Recipe);
2288-
}
2289-
}
2290-
// For Z/Sext, get the context from the operand.
2291-
else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt ||
2292-
Opcode == Instruction::FPExt) {
2293-
if (Operand->isLiveIn())
2294-
CCH = TTI::CastContextHint::Normal;
2295-
else if (auto *Recipe = Operand->getDefiningRecipe()) {
2296-
VPValue *ReverseOp;
2297-
if (match(Recipe, m_Reverse(m_VPValue(ReverseOp))))
2298-
Recipe = ReverseOp->getDefiningRecipe();
2299-
if (Recipe)
2300-
CCH = ComputeCCH(Recipe);
2301-
}
2302-
}
2303-
2304-
auto *SrcTy =
2305-
cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(Operand), VF));
2306-
auto *DestTy = cast<VectorType>(toVectorTy(getResultType(), VF));
2307-
// Arm TTI will use the underlying instruction to determine the cost.
2308-
return Ctx.TTI.getCastInstrCost(
2309-
Opcode, DestTy, SrcTy, CCH, Ctx.CostKind,
2310-
dyn_cast_if_present<Instruction>(getUnderlyingValue()));
2327+
return getCostForRecipeWithOpcode(getOpcode(), VF, Ctx);
23112328
}
23122329

23132330
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -3356,6 +3373,24 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
33563373
return (ScalarCost * VF.getFixedValue()) +
33573374
Ctx.getScalarizationOverhead(ResultTy, OpsToScalarize, VF, true);
33583375
}
3376+
case Instruction::SExt:
3377+
case Instruction::ZExt:
3378+
case Instruction::FPToUI:
3379+
case Instruction::FPToSI:
3380+
case Instruction::FPExt:
3381+
case Instruction::PtrToInt:
3382+
case Instruction::IntToPtr:
3383+
case Instruction::SIToFP:
3384+
case Instruction::UIToFP:
3385+
case Instruction::Trunc:
3386+
case Instruction::FPTrunc: {
3387+
return getCostForRecipeWithOpcode(getOpcode(), ElementCount::getFixed(1),
3388+
Ctx) *
3389+
(isSingleScalar() ? 1 : VF.getFixedValue());
3390+
}
3391+
case Instruction::ExtractValue:
3392+
case Instruction::InsertValue:
3393+
return Ctx.TTI.getInsertExtractValueCost(getOpcode(), Ctx.CostKind);
33593394
}
33603395

33613396
return Ctx.getLegacyCost(UI, VF);

0 commit comments

Comments
 (0)