DOC adding PDP for categorical features in highlights (#25065)

glemaitre · jeremiedbb · web-flow · commit 45019594938f · 2022-11-30T14:06:54.000-05:00
Co-authored-by: Jérémie du Boisberranger &lt;34657725+jeremiedbb@users.noreply.github.com&gt;
diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst
@@ -411,7 +411,7 @@ Changelog
 :mod:`sklearn.inspection`
 .........................
 
-- |Enhancement| Extended :func:`inspection.partial_dependence` and
+- |MajorFeature| Extended :func:`inspection.partial_dependence` and
   :class:`inspection.PartialDependenceDisplay` to handle categorical features.
   :pr:`18298` by :user:`Madhura Jayaratne <madhuracj>` and
   :user:`Guillaume Lemaitre <glemaitre>`.
diff --git a/examples/release_highlights/plot_release_highlights_1_2_0.py b/examples/release_highlights/plot_release_highlights_1_2_0.py
@@ -93,15 +93,49 @@
     hist_no_interact, X, y, cv=5, n_jobs=2, train_sizes=np.linspace(0.1, 1, 5)
 )
 
+# %%
+# :class:`~inspection.PartialDependenceDisplay` exposes a new parameter
+# `categorical_features` to display partial dependence for categorical features
+# using bar plots and heatmaps.
+from sklearn.datasets import fetch_openml
+
+X, y = fetch_openml(
+    "titanic", version=1, as_frame=True, return_X_y=True, parser="pandas"
+)
+X = X.select_dtypes(["number", "category"]).drop(columns=["body"])
+
+# %%
+from sklearn.preprocessing import OrdinalEncoder
+from sklearn.pipeline import make_pipeline
+
+categorical_features = ["pclass", "sex", "embarked"]
+model = make_pipeline(
+    ColumnTransformer(
+        transformers=[("cat", OrdinalEncoder(), categorical_features)],
+        remainder="passthrough",
+    ),
+    HistGradientBoostingRegressor(random_state=0),
+).fit(X, y)
+
+# %%
+from sklearn.inspection import PartialDependenceDisplay
+
+fig, ax = plt.subplots(figsize=(14, 4), constrained_layout=True)
+_ = PartialDependenceDisplay.from_estimator(
+    model,
+    X,
+    features=["age", "sex", ("pclass", "sex")],
+    categorical_features=categorical_features,
+    ax=ax,
+)
+
 # %%
 # Faster parser in :func:`~datasets.fetch_openml`
 # -----------------------------------------------
 # :func:`~datasets.fetch_openml` now supports a new `"pandas"` parser that is
 # more memory and CPU efficient. In v1.4, the default will change to
 # `parser="auto"` which will automatically use the `"pandas"` parser for dense
 # data and `"liac-arff"` for sparse data.
-from sklearn.datasets import fetch_openml
-
 X, y = fetch_openml(
     "titanic", version=1, as_frame=True, return_X_y=True, parser="pandas"
 )