Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit d388d88

Browse files
DOC fix xlabel in Tweedie regression on insurance claims (#30362)
Co-authored-by: Jérémie du Boisberranger <[email protected]>
1 parent 0d9fb78 commit d388d88

File tree

1 file changed

+21
-18
lines changed

1 file changed

+21
-18
lines changed

examples/linear_model/plot_tweedie_regression_insurance_claims.py

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -613,11 +613,11 @@ def score_estimator(
613613

614614
# %%
615615
#
616-
# Finally, we can compare the two models using a plot of cumulated claims: for
616+
# Finally, we can compare the two models using a plot of cumulative claims: for
617617
# each model, the policyholders are ranked from safest to riskiest based on the
618-
# model predictions and the fraction of observed total cumulated claims is
619-
# plotted on the y axis. This plot is often called the ordered Lorenz curve of
620-
# the model.
618+
# model predictions and the cumulative proportion of claim amounts is plotted
619+
# against the cumulative proportion of exposure. This plot is often called
620+
# the ordered Lorenz curve of the model.
621621
#
622622
# The Gini coefficient (based on the area between the curve and the diagonal)
623623
# can be used as a model selection metric to quantify the ability of the model
@@ -627,7 +627,7 @@ def score_estimator(
627627
# Gini coefficient is upper bounded by 1.0 but even an oracle model that ranks
628628
# the policyholders by the observed claim amounts cannot reach a score of 1.0.
629629
#
630-
# We observe that both models are able to rank policyholders by risky-ness
630+
# We observe that both models are able to rank policyholders by riskiness
631631
# significantly better than chance although they are also both far from the
632632
# oracle model due to the natural difficulty of the prediction problem from a
633633
# few features: most accidents are not predictable and can be caused by
@@ -653,11 +653,11 @@ def lorenz_curve(y_true, y_pred, exposure):
653653
ranking = np.argsort(y_pred)
654654
ranked_exposure = exposure[ranking]
655655
ranked_pure_premium = y_true[ranking]
656-
cumulated_claim_amount = np.cumsum(ranked_pure_premium * ranked_exposure)
657-
cumulated_claim_amount /= cumulated_claim_amount[-1]
658-
cumulated_exposure = np.cumsum(ranked_exposure)
659-
cumulated_exposure /= cumulated_exposure[-1]
660-
return cumulated_exposure, cumulated_claim_amount
656+
cumulative_claim_amount = np.cumsum(ranked_pure_premium * ranked_exposure)
657+
cumulative_claim_amount /= cumulative_claim_amount[-1]
658+
cumulative_exposure = np.cumsum(ranked_exposure)
659+
cumulative_exposure /= cumulative_exposure[-1]
660+
return cumulative_exposure, cumulative_claim_amount
661661

662662

663663
fig, ax = plt.subplots(figsize=(8, 8))
@@ -669,27 +669,30 @@ def lorenz_curve(y_true, y_pred, exposure):
669669
("Frequency * Severity model", y_pred_product),
670670
("Compound Poisson Gamma", y_pred_total),
671671
]:
672-
ordered_samples, cum_claims = lorenz_curve(
672+
cum_exposure, cum_claims = lorenz_curve(
673673
df_test["PurePremium"], y_pred, df_test["Exposure"]
674674
)
675-
gini = 1 - 2 * auc(ordered_samples, cum_claims)
675+
gini = 1 - 2 * auc(cum_exposure, cum_claims)
676676
label += " (Gini index: {:.3f})".format(gini)
677-
ax.plot(ordered_samples, cum_claims, linestyle="-", label=label)
677+
ax.plot(cum_exposure, cum_claims, linestyle="-", label=label)
678678

679679
# Oracle model: y_pred == y_test
680-
ordered_samples, cum_claims = lorenz_curve(
680+
cum_exposure, cum_claims = lorenz_curve(
681681
df_test["PurePremium"], df_test["PurePremium"], df_test["Exposure"]
682682
)
683-
gini = 1 - 2 * auc(ordered_samples, cum_claims)
683+
gini = 1 - 2 * auc(cum_exposure, cum_claims)
684684
label = "Oracle (Gini index: {:.3f})".format(gini)
685-
ax.plot(ordered_samples, cum_claims, linestyle="-.", color="gray", label=label)
685+
ax.plot(cum_exposure, cum_claims, linestyle="-.", color="gray", label=label)
686686

687687
# Random baseline
688688
ax.plot([0, 1], [0, 1], linestyle="--", color="black", label="Random baseline")
689689
ax.set(
690690
title="Lorenz Curves",
691-
xlabel="Fraction of policyholders\n(ordered by model from safest to riskiest)",
692-
ylabel="Fraction of total claim amount",
691+
xlabel=(
692+
"Cumulative proportion of exposure\n"
693+
"(ordered by model from safest to riskiest)"
694+
),
695+
ylabel="Cumulative proportion of claim amounts",
693696
)
694697
ax.legend(loc="upper left")
695698
plt.plot()

0 commit comments

Comments
 (0)