diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 1338606e3a096..688c42fd1748d 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -411,6 +411,11 @@ Changelog :func:`~sklearn.inspection.permutation_importance`. :pr:`19411` by :user:`Simona Maggio `. +- |Enhancement| Add kwargs to format ICE and PD lines separately in partial + dependence plots :func:`~sklearn.inspection.plot_partial_dependence` and + :meth:`~sklearn.inspection.PartialDependenceDisplay.plot`. + :pr:`19428` by :user:`Mehdi Hamoumi `. + :mod:`sklearn.linear_model` ........................... diff --git a/examples/inspection/plot_partial_dependence.py b/examples/inspection/plot_partial_dependence.py index ac8d20ec9f155..ceccd8c3001c1 100644 --- a/examples/inspection/plot_partial_dependence.py +++ b/examples/inspection/plot_partial_dependence.py @@ -53,9 +53,7 @@ y -= y.mean() -X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.1, random_state=0 -) +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0) # %% # 1-way partial dependence with different models @@ -80,10 +78,12 @@ print("Training MLPRegressor...") tic = time() -est = make_pipeline(QuantileTransformer(), - MLPRegressor(hidden_layer_sizes=(50, 50), - learning_rate_init=0.01, - early_stopping=True)) +est = make_pipeline( + QuantileTransformer(), + MLPRegressor( + hidden_layer_sizes=(50, 50), learning_rate_init=0.01, early_stopping=True + ), +) est.fit(X_train, y_train) print(f"done in {time() - tic:.3f}s") print(f"Test R2 score: {est.score(X_test, y_test):.2f}") @@ -113,17 +113,25 @@ from sklearn.inspection import partial_dependence from sklearn.inspection import plot_partial_dependence -print('Computing partial dependence plots...') +print("Computing partial dependence plots...") tic = time() -features = ['MedInc', 'AveOccup', 'HouseAge', 'AveRooms'] +features = ["MedInc", "AveOccup", "HouseAge", "AveRooms"] display = plot_partial_dependence( - est, X_train, features, kind="both", subsample=50, - n_jobs=3, grid_resolution=20, random_state=0 + est, + X_train, + features, + kind="both", + subsample=50, + n_jobs=3, + grid_resolution=20, + random_state=0, + ice_lines_kw={"color": "tab:blue", "alpha": 0.2, "linewidth": 0.5}, + pd_line_kw={"color": "tab:orange", "linestyle": "--"}, ) print(f"done in {time() - tic:.3f}s") display.figure_.suptitle( - 'Partial dependence of house value on non-location features\n' - 'for the California housing dataset, with MLPRegressor' + "Partial dependence of house value on non-location features\n" + "for the California housing dataset, with MLPRegressor" ) display.figure_.subplots_adjust(hspace=0.3) @@ -156,16 +164,24 @@ # We will plot the partial dependence, both individual (ICE) and averaged one # (PDP). We limit to only 50 ICE curves to not overcrowd the plot. -print('Computing partial dependence plots...') +print("Computing partial dependence plots...") tic = time() display = plot_partial_dependence( - est, X_train, features, kind="both", subsample=50, - n_jobs=3, grid_resolution=20, random_state=0 + est, + X_train, + features, + kind="both", + subsample=50, + n_jobs=3, + grid_resolution=20, + random_state=0, + ice_lines_kw={"color": "tab:blue", "alpha": 0.2, "linewidth": 0.5}, + pd_line_kw={"color": "tab:orange", "linestyle": "--"}, ) print(f"done in {time() - tic:.3f}s") display.figure_.suptitle( - 'Partial dependence of house value on non-location features\n' - 'for the California housing dataset, with Gradient Boosting' + "Partial dependence of house value on non-location features\n" + "for the California housing dataset, with Gradient Boosting" ) display.figure_.subplots_adjust(wspace=0.4, hspace=0.3) @@ -209,18 +225,23 @@ # the tree-based algorithm, when only PDPs are requested, they can be computed # on an efficient way using the `'recursion'` method. -features = ['AveOccup', 'HouseAge', ('AveOccup', 'HouseAge')] -print('Computing partial dependence plots...') +features = ["AveOccup", "HouseAge", ("AveOccup", "HouseAge")] +print("Computing partial dependence plots...") tic = time() _, ax = plt.subplots(ncols=3, figsize=(9, 4)) display = plot_partial_dependence( - est, X_train, features, kind='average', n_jobs=3, grid_resolution=20, + est, + X_train, + features, + kind="average", + n_jobs=3, + grid_resolution=20, ax=ax, ) print(f"done in {time() - tic:.3f}s") display.figure_.suptitle( - 'Partial dependence of house value on non-location features\n' - 'for the California housing dataset, with Gradient Boosting' + "Partial dependence of house value on non-location features\n" + "for the California housing dataset, with Gradient Boosting" ) display.figure_.subplots_adjust(wspace=0.4, hspace=0.3) @@ -240,24 +261,27 @@ import numpy as np from mpl_toolkits.mplot3d import Axes3D + fig = plt.figure() -features = ('AveOccup', 'HouseAge') +features = ("AveOccup", "HouseAge") pdp = partial_dependence( - est, X_train, features=features, kind='average', grid_resolution=20 + est, X_train, features=features, kind="average", grid_resolution=20 ) XX, YY = np.meshgrid(pdp["values"][0], pdp["values"][1]) Z = pdp.average[0].T ax = Axes3D(fig) -surf = ax.plot_surface(XX, YY, Z, rstride=1, cstride=1, - cmap=plt.cm.BuPu, edgecolor='k') +fig.add_axes(ax) +surf = ax.plot_surface(XX, YY, Z, rstride=1, cstride=1, cmap=plt.cm.BuPu, edgecolor="k") ax.set_xlabel(features[0]) ax.set_ylabel(features[1]) -ax.set_zlabel('Partial dependence') +ax.set_zlabel("Partial dependence") # pretty init view ax.view_init(elev=22, azim=122) plt.colorbar(surf) -plt.suptitle('Partial dependence of house value on median\n' - 'age and average occupancy, with Gradient Boosting') +plt.suptitle( + "Partial dependence of house value on median\n" + "age and average occupancy, with Gradient Boosting" +) plt.subplots_adjust(top=0.9) plt.show() diff --git a/sklearn/inspection/_plot/partial_dependence.py b/sklearn/inspection/_plot/partial_dependence.py index b45bdbe0b2fb1..f03669e7a4207 100644 --- a/sklearn/inspection/_plot/partial_dependence.py +++ b/sklearn/inspection/_plot/partial_dependence.py @@ -32,6 +32,8 @@ def plot_partial_dependence( n_jobs=None, verbose=0, line_kw=None, + ice_lines_kw=None, + pd_line_kw=None, contour_kw=None, ax=None, kind="average", @@ -185,7 +187,24 @@ def plot_partial_dependence( line_kw : dict, default=None Dict with keywords passed to the ``matplotlib.pyplot.plot`` call. - For one-way partial dependence plots. + For one-way partial dependence plots. It can be used to define common + properties for both `ice_lines_kw` and `pdp_line_kw`. + + ice_lines_kw : dict, default=None + Dictionary with keywords passed to the `matplotlib.pyplot.plot` call. + For ICE lines in the one-way partial dependence plots. + The key value pairs defined in `ice_lines_kw` takes priority over + `line_kw`. + + .. versionadded:: 1.0 + + pd_line_kw : dict, default=None + Dictionary with keywords passed to the `matplotlib.pyplot.plot` call. + For partial dependence in one-way partial dependence plots. + The key value pairs defined in `pd_line_kw` takes priority over + `line_kw`. + + .. versionadded:: 1.0 contour_kw : dict, default=None Dict with keywords passed to the ``matplotlib.pyplot.contourf`` call. @@ -413,7 +432,14 @@ def convert_feature(fx): subsample=subsample, random_state=random_state, ) - return display.plot(ax=ax, n_cols=n_cols, line_kw=line_kw, contour_kw=contour_kw) + return display.plot( + ax=ax, + n_cols=n_cols, + line_kw=line_kw, + ice_lines_kw=ice_lines_kw, + pd_line_kw=pd_line_kw, + contour_kw=contour_kw, + ) class PartialDependenceDisplay: @@ -675,8 +701,8 @@ def _plot_one_way_partial_dependence( n_cols, pd_plot_idx, n_lines, - individual_line_kw, - line_kw, + ice_lines_kw, + pd_line_kw, ): """Plot 1-way partial dependence: ICE and PDP. @@ -704,9 +730,9 @@ def _plot_one_way_partial_dependence( matching 2D position in the grid layout. n_lines : int The total number of lines expected to be plot on the axis. - individual_line_kw : dict + ice_lines_kw : dict Dict with keywords passed when plotting the ICE lines. - line_kw : dict + pd_line_kw : dict Dict with keywords passed when plotting the PD plot. """ from matplotlib import transforms # noqa @@ -719,7 +745,7 @@ def _plot_one_way_partial_dependence( ax, pd_plot_idx, n_lines, - individual_line_kw, + ice_lines_kw, ) if self.kind in ("average", "both"): @@ -733,7 +759,7 @@ def _plot_one_way_partial_dependence( feature_values, ax, pd_line_idx, - line_kw, + pd_line_kw, ) trans = transforms.blended_transform_factory(ax.transData, ax.transAxes) @@ -759,7 +785,7 @@ def _plot_one_way_partial_dependence( else: ax.set_yticklabels([]) - if line_kw.get("label", None) and self.kind != "individual": + if pd_line_kw.get("label", None) and self.kind != "individual": ax.legend() def _plot_two_way_partial_dependence( @@ -842,7 +868,16 @@ def _plot_two_way_partial_dependence( ax.set_ylabel(self.feature_names[feature_idx[1]]) @_deprecate_positional_args(version="1.1") - def plot(self, *, ax=None, n_cols=3, line_kw=None, contour_kw=None): + def plot( + self, + *, + ax=None, + n_cols=3, + line_kw=None, + ice_lines_kw=None, + pd_line_kw=None, + contour_kw=None, + ): """Plot partial dependence plots. Parameters @@ -865,6 +900,22 @@ def plot(self, *, ax=None, n_cols=3, line_kw=None, contour_kw=None): Dict with keywords passed to the `matplotlib.pyplot.plot` call. For one-way partial dependence plots. + ice_lines_kw : dict, default=None + Dictionary with keywords passed to the `matplotlib.pyplot.plot` call. + For ICE lines in the one-way partial dependence plots. + The key value pairs defined in `ice_lines_kw` takes priority over + `line_kw`. + + .. versionadded:: 1.0 + + pd_line_kw : dict, default=None + Dictionary with keywords passed to the `matplotlib.pyplot.plot` call. + For partial dependence in one-way partial dependence plots. + The key value pairs defined in `pd_line_kw` takes priority over + `line_kw`. + + .. versionadded:: 1.0 + contour_kw : dict, default=None Dict with keywords passed to the `matplotlib.pyplot.contourf` call for two-way partial dependence plots. @@ -880,6 +931,10 @@ def plot(self, *, ax=None, n_cols=3, line_kw=None, contour_kw=None): if line_kw is None: line_kw = {} + if ice_lines_kw is None: + ice_lines_kw = {} + if pd_line_kw is None: + pd_line_kw = {} if contour_kw is None: contour_kw = {} @@ -893,14 +948,20 @@ def plot(self, *, ax=None, n_cols=3, line_kw=None, contour_kw=None): "color": "C0", "label": "average" if self.kind == "both" else None, } - line_kw = {**default_line_kws, **line_kw} + if self.kind in ("individual", "both"): + default_ice_lines_kws = {"alpha": 0.3, "linewidth": 0.5} + else: + default_ice_lines_kws = {} - individual_line_kw = line_kw.copy() - del individual_line_kw["label"] + ice_lines_kw = { + **default_line_kws, + **line_kw, + **default_ice_lines_kws, + **ice_lines_kw, + } + del ice_lines_kw["label"] - if self.kind == "individual" or self.kind == "both": - individual_line_kw["alpha"] = 0.3 - individual_line_kw["linewidth"] = 0.5 + pd_line_kw = {**default_line_kws, **line_kw, **pd_line_kw} n_features = len(self.features) if self.kind in ("individual", "both"): @@ -998,8 +1059,8 @@ def plot(self, *, ax=None, n_cols=3, line_kw=None, contour_kw=None): n_cols, pd_plot_idx, n_lines, - individual_line_kw, - line_kw, + ice_lines_kw, + pd_line_kw, ) else: self._plot_two_way_partial_dependence( diff --git a/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py b/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py index 25c543d94c3c0..4d33313c8c884 100644 --- a/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py +++ b/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py @@ -687,3 +687,56 @@ def test_partial_dependence_overwrite_labels( legend_text = ax.get_legend().get_texts() assert len(legend_text) == 1 assert legend_text[0].get_text() == label + + +@pytest.mark.filterwarnings("ignore:A Bunch will be returned") +@pytest.mark.parametrize( + "line_kw, pd_line_kw, ice_lines_kw, expected_colors", + [ + ({"color": "r"}, {"color": "g"}, {"color": "b"}, ("g", "b")), + (None, {"color": "g"}, {"color": "b"}, ("g", "b")), + ({"color": "r"}, None, {"color": "b"}, ("r", "b")), + ({"color": "r"}, {"color": "g"}, None, ("g", "r")), + ({"color": "r"}, None, None, ("r", "r")), + ({"color": "r"}, {"linestyle": "--"}, {"linestyle": "-."}, ("r", "r")), + ], +) +def test_plot_partial_dependence_lines_kw( + pyplot, + clf_diabetes, + diabetes, + line_kw, + pd_line_kw, + ice_lines_kw, + expected_colors, +): + """Check that passing `pd_line_kw` and `ice_lines_kw` will act on the + specific lines in the plot. + """ + + disp = plot_partial_dependence( + clf_diabetes, + diabetes.data, + [0, 2], + grid_resolution=20, + feature_names=diabetes.feature_names, + n_cols=2, + kind="both", + line_kw=line_kw, + pd_line_kw=pd_line_kw, + ice_lines_kw=ice_lines_kw, + ) + + line = disp.lines_[0, 0, -1] + assert line.get_color() == expected_colors[0] + if pd_line_kw is not None and "linestyle" in pd_line_kw: + assert line.get_linestyle() == pd_line_kw["linestyle"] + else: + assert line.get_linestyle() == "-" + + line = disp.lines_[0, 0, 0] + assert line.get_color() == expected_colors[1] + if ice_lines_kw is not None and "linestyle" in ice_lines_kw: + assert line.get_linestyle() == ice_lines_kw["linestyle"] + else: + assert line.get_linestyle() == "-"