@@ -3063,17 +3063,42 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
3063
3063
case Instruction::Call: {
3064
3064
auto *CalledFn =
3065
3065
cast<Function>(getOperand (getNumOperands () - 1 )->getLiveInIRValue ());
3066
- if (CalledFn->isIntrinsic ())
3067
- break ;
3068
3066
3067
+ SmallVector<const VPValue *> ArgOps (drop_end (operands ()));
3069
3068
SmallVector<Type *, 4 > Tys;
3070
- for (VPValue *ArgOp : drop_end ( operands ()) )
3069
+ for (const VPValue *ArgOp : ArgOps )
3071
3070
Tys.push_back (Ctx.Types .inferScalarType (ArgOp));
3071
+
3072
+ if (CalledFn->isIntrinsic ())
3073
+ // Various pseudo-intrinsics with costs of 0 are scalarized instead of
3074
+ // vectorized via VPWidenIntrinsicRecipe. Return 0 for them early.
3075
+ switch (CalledFn->getIntrinsicID ()) {
3076
+ case Intrinsic::assume:
3077
+ case Intrinsic::lifetime_end:
3078
+ case Intrinsic::lifetime_start:
3079
+ case Intrinsic::sideeffect:
3080
+ case Intrinsic::pseudoprobe:
3081
+ case Intrinsic::experimental_noalias_scope_decl: {
3082
+ assert (getCostForIntrinsics (CalledFn->getIntrinsicID (), ArgOps, *this ,
3083
+ ElementCount::getFixed (1 ), Ctx) == 0 &&
3084
+ " scalarizing intrinsic should be free" );
3085
+ return InstructionCost (0 );
3086
+ }
3087
+ default :
3088
+ break ;
3089
+ }
3090
+
3072
3091
Type *ResultTy = Ctx.Types .inferScalarType (this );
3073
3092
InstructionCost ScalarCallCost =
3074
3093
Ctx.TTI .getCallInstrCost (CalledFn, ResultTy, Tys, Ctx.CostKind );
3075
- if (isSingleScalar ())
3094
+ if (isSingleScalar ()) {
3095
+ if (CalledFn->isIntrinsic ())
3096
+ ScalarCallCost = std::min (
3097
+ ScalarCallCost,
3098
+ getCostForIntrinsics (CalledFn->getIntrinsicID (), ArgOps, *this ,
3099
+ ElementCount::getFixed (1 ), Ctx));
3076
3100
return ScalarCallCost;
3101
+ }
3077
3102
3078
3103
if (VF.isScalable ())
3079
3104
return InstructionCost::getInvalid ();
@@ -3094,7 +3119,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
3094
3119
// incur any overhead.
3095
3120
SmallPtrSet<const VPValue *, 4 > UniqueOperands;
3096
3121
Tys.clear ();
3097
- for (auto *Op : drop_end ( operands ()) ) {
3122
+ for (auto *Op : ArgOps ) {
3098
3123
if (Op->isLiveIn () || isa<VPReplicateRecipe, VPPredInstPHIRecipe>(Op) ||
3099
3124
!UniqueOperands.insert (Op).second )
3100
3125
continue ;
@@ -3104,8 +3129,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
3104
3129
Ctx.TTI .getOperandsScalarizationOverhead (Tys, Ctx.CostKind );
3105
3130
}
3106
3131
3107
- return ScalarCallCost * (isSingleScalar () ? 1 : VF.getFixedValue ()) +
3108
- ScalarizationCost;
3132
+ return ScalarCallCost * VF.getFixedValue () + ScalarizationCost;
3109
3133
}
3110
3134
case Instruction::Add:
3111
3135
case Instruction::Sub:
0 commit comments