Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
f8ca2fd
Add parameters to format ICE and PD lines separately in partial depen…
mhham Feb 10, 2021
3b71e0d
Add warning when line_kw is used in conjunction with pd_line_kw or ic…
mhham Feb 10, 2021
e35187d
Add test to check line formatting behavior in partial dependence plot
mhham Feb 10, 2021
69c33fe
Add PR info to whats_new
mhham Feb 10, 2021
6a21afa
Add call to action to silence line_kw warning
mhham Feb 11, 2021
b2b5d74
Add comment to explain what test does
mhham Feb 11, 2021
41ede4d
Add partial dependence line kwargs to documentation
mhham Feb 11, 2021
281ad24
Add line_kw ice_lines_kw pd_line_kw warnings test
mhham Feb 11, 2021
8ff4183
Formatting + pytest check UserWarning instead of Warning
mhham Feb 11, 2021
0e6001a
Merge branch 'main' into feature/pdp-ice-pd-lines
mhham Feb 25, 2021
c01307d
Apply suggestions from code review: versionadded
mhham Apr 7, 2021
6c2d85f
Add ICE and PDP line formatting to example
mhham Apr 7, 2021
61cca2e
Merge remote-tracking branch 'upstream/main' into feature/pdp-ice-pd-…
mhham Apr 7, 2021
7839d7e
Merge remote-tracking branch 'origin/main' into pr/mhham/19428
glemaitre Jun 29, 2021
438b685
fix whats new
glemaitre Jun 29, 2021
91f7acf
Apply suggestions from code review
glemaitre Jul 9, 2021
bca4719
black
glemaitre Jul 9, 2021
2db0eb1
solve issue warning
glemaitre Jul 9, 2021
724ef4b
Merge remote-tracking branch 'origin/main' into pr/mhham/19428
glemaitre Jul 9, 2021
17cf123
TST check that we don't raise spurious warnings
glemaitre Jul 30, 2021
739f3f9
avoid warning
glemaitre Aug 2, 2021
4a012c5
improve doc
glemaitre Aug 2, 2021
a8c3597
Merge remote-tracking branch 'origin/main' into pr/mhham/19428
glemaitre Aug 2, 2021
c45c98d
Apply suggestions from code review
glemaitre Aug 16, 2021
5d9c8a0
Merge remote-tracking branch 'origin/main' into pr/mhham/19428
glemaitre Aug 16, 2021
15a4288
Update plot_partial_dependence.py
glemaitre Aug 16, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/whats_new/v1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,11 @@ Changelog
:func:`~sklearn.inspection.permutation_importance`.
:pr:`19411` by :user:`Simona Maggio <simonamaggio>`.

- |Enhancement| Add kwargs to format ICE and PD lines separately in partial
dependence plots :func:`~sklearn.inspection.plot_partial_dependence` and
:meth:`~sklearn.inspection.PartialDependenceDisplay.plot`.
:pr:`19428` by :user:`Mehdi Hamoumi <mhham>`.

:mod:`sklearn.linear_model`
...........................

Expand Down
84 changes: 54 additions & 30 deletions examples/inspection/plot_partial_dependence.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,7 @@

y -= y.mean()

X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.1, random_state=0
)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)

# %%
# 1-way partial dependence with different models
Expand All @@ -80,10 +78,12 @@

print("Training MLPRegressor...")
tic = time()
est = make_pipeline(QuantileTransformer(),
MLPRegressor(hidden_layer_sizes=(50, 50),
learning_rate_init=0.01,
early_stopping=True))
est = make_pipeline(
QuantileTransformer(),
MLPRegressor(
hidden_layer_sizes=(50, 50), learning_rate_init=0.01, early_stopping=True
),
)
est.fit(X_train, y_train)
print(f"done in {time() - tic:.3f}s")
print(f"Test R2 score: {est.score(X_test, y_test):.2f}")
Expand Down Expand Up @@ -113,17 +113,25 @@
from sklearn.inspection import partial_dependence
from sklearn.inspection import plot_partial_dependence

print('Computing partial dependence plots...')
print("Computing partial dependence plots...")
tic = time()
features = ['MedInc', 'AveOccup', 'HouseAge', 'AveRooms']
features = ["MedInc", "AveOccup", "HouseAge", "AveRooms"]
display = plot_partial_dependence(
est, X_train, features, kind="both", subsample=50,
n_jobs=3, grid_resolution=20, random_state=0
est,
X_train,
features,
kind="both",
subsample=50,
n_jobs=3,
grid_resolution=20,
random_state=0,
ice_lines_kw={"color": "tab:blue", "alpha": 0.2, "linewidth": 0.5},
pd_line_kw={"color": "tab:orange", "linestyle": "--"},
)
print(f"done in {time() - tic:.3f}s")
display.figure_.suptitle(
'Partial dependence of house value on non-location features\n'
'for the California housing dataset, with MLPRegressor'
"Partial dependence of house value on non-location features\n"
"for the California housing dataset, with MLPRegressor"
)
display.figure_.subplots_adjust(hspace=0.3)

Expand Down Expand Up @@ -156,16 +164,24 @@
# We will plot the partial dependence, both individual (ICE) and averaged one
# (PDP). We limit to only 50 ICE curves to not overcrowd the plot.

print('Computing partial dependence plots...')
print("Computing partial dependence plots...")
tic = time()
display = plot_partial_dependence(
est, X_train, features, kind="both", subsample=50,
n_jobs=3, grid_resolution=20, random_state=0
est,
X_train,
features,
kind="both",
subsample=50,
n_jobs=3,
grid_resolution=20,
random_state=0,
ice_lines_kw={"color": "tab:blue", "alpha": 0.2, "linewidth": 0.5},
pd_line_kw={"color": "tab:orange", "linestyle": "--"},
)
print(f"done in {time() - tic:.3f}s")
display.figure_.suptitle(
'Partial dependence of house value on non-location features\n'
'for the California housing dataset, with Gradient Boosting'
"Partial dependence of house value on non-location features\n"
"for the California housing dataset, with Gradient Boosting"
)
display.figure_.subplots_adjust(wspace=0.4, hspace=0.3)

Expand Down Expand Up @@ -209,18 +225,23 @@
# the tree-based algorithm, when only PDPs are requested, they can be computed
# on an efficient way using the `'recursion'` method.

features = ['AveOccup', 'HouseAge', ('AveOccup', 'HouseAge')]
print('Computing partial dependence plots...')
features = ["AveOccup", "HouseAge", ("AveOccup", "HouseAge")]
print("Computing partial dependence plots...")
tic = time()
_, ax = plt.subplots(ncols=3, figsize=(9, 4))
display = plot_partial_dependence(
est, X_train, features, kind='average', n_jobs=3, grid_resolution=20,
est,
X_train,
features,
kind="average",
n_jobs=3,
grid_resolution=20,
ax=ax,
)
print(f"done in {time() - tic:.3f}s")
display.figure_.suptitle(
'Partial dependence of house value on non-location features\n'
'for the California housing dataset, with Gradient Boosting'
"Partial dependence of house value on non-location features\n"
"for the California housing dataset, with Gradient Boosting"
)
display.figure_.subplots_adjust(wspace=0.4, hspace=0.3)

Expand All @@ -240,24 +261,27 @@

import numpy as np
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure()

features = ('AveOccup', 'HouseAge')
features = ("AveOccup", "HouseAge")
pdp = partial_dependence(
est, X_train, features=features, kind='average', grid_resolution=20
est, X_train, features=features, kind="average", grid_resolution=20
)
XX, YY = np.meshgrid(pdp["values"][0], pdp["values"][1])
Z = pdp.average[0].T
ax = Axes3D(fig)
surf = ax.plot_surface(XX, YY, Z, rstride=1, cstride=1,
cmap=plt.cm.BuPu, edgecolor='k')
fig.add_axes(ax)
surf = ax.plot_surface(XX, YY, Z, rstride=1, cstride=1, cmap=plt.cm.BuPu, edgecolor="k")
ax.set_xlabel(features[0])
ax.set_ylabel(features[1])
ax.set_zlabel('Partial dependence')
ax.set_zlabel("Partial dependence")
# pretty init view
ax.view_init(elev=22, azim=122)
plt.colorbar(surf)
plt.suptitle('Partial dependence of house value on median\n'
'age and average occupancy, with Gradient Boosting')
plt.suptitle(
"Partial dependence of house value on median\n"
"age and average occupancy, with Gradient Boosting"
)
plt.subplots_adjust(top=0.9)
plt.show()
97 changes: 79 additions & 18 deletions sklearn/inspection/_plot/partial_dependence.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ def plot_partial_dependence(
n_jobs=None,
verbose=0,
line_kw=None,
ice_lines_kw=None,
pd_line_kw=None,
contour_kw=None,
ax=None,
kind="average",
Expand Down Expand Up @@ -185,7 +187,24 @@ def plot_partial_dependence(

line_kw : dict, default=None
Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.
For one-way partial dependence plots.
For one-way partial dependence plots. It can be used to define common
properties for both `ice_lines_kw` and `pdp_line_kw`.

ice_lines_kw : dict, default=None
Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.
For ICE lines in the one-way partial dependence plots.
The key value pairs defined in `ice_lines_kw` takes priority over
`line_kw`.

.. versionadded:: 1.0

pd_line_kw : dict, default=None
Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.
For partial dependence in one-way partial dependence plots.
The key value pairs defined in `pd_line_kw` takes priority over
`line_kw`.

.. versionadded:: 1.0

contour_kw : dict, default=None
Dict with keywords passed to the ``matplotlib.pyplot.contourf`` call.
Expand Down Expand Up @@ -413,7 +432,14 @@ def convert_feature(fx):
subsample=subsample,
random_state=random_state,
)
return display.plot(ax=ax, n_cols=n_cols, line_kw=line_kw, contour_kw=contour_kw)
return display.plot(
ax=ax,
n_cols=n_cols,
line_kw=line_kw,
ice_lines_kw=ice_lines_kw,
pd_line_kw=pd_line_kw,
contour_kw=contour_kw,
)


class PartialDependenceDisplay:
Expand Down Expand Up @@ -675,8 +701,8 @@ def _plot_one_way_partial_dependence(
n_cols,
pd_plot_idx,
n_lines,
individual_line_kw,
line_kw,
ice_lines_kw,
pd_line_kw,
):
"""Plot 1-way partial dependence: ICE and PDP.

Expand Down Expand Up @@ -704,9 +730,9 @@ def _plot_one_way_partial_dependence(
matching 2D position in the grid layout.
n_lines : int
The total number of lines expected to be plot on the axis.
individual_line_kw : dict
ice_lines_kw : dict
Dict with keywords passed when plotting the ICE lines.
line_kw : dict
pd_line_kw : dict
Dict with keywords passed when plotting the PD plot.
"""
from matplotlib import transforms # noqa
Expand All @@ -719,7 +745,7 @@ def _plot_one_way_partial_dependence(
ax,
pd_plot_idx,
n_lines,
individual_line_kw,
ice_lines_kw,
)

if self.kind in ("average", "both"):
Expand All @@ -733,7 +759,7 @@ def _plot_one_way_partial_dependence(
feature_values,
ax,
pd_line_idx,
line_kw,
pd_line_kw,
)

trans = transforms.blended_transform_factory(ax.transData, ax.transAxes)
Expand All @@ -759,7 +785,7 @@ def _plot_one_way_partial_dependence(
else:
ax.set_yticklabels([])

if line_kw.get("label", None) and self.kind != "individual":
if pd_line_kw.get("label", None) and self.kind != "individual":
ax.legend()

def _plot_two_way_partial_dependence(
Expand Down Expand Up @@ -842,7 +868,16 @@ def _plot_two_way_partial_dependence(
ax.set_ylabel(self.feature_names[feature_idx[1]])

@_deprecate_positional_args(version="1.1")
def plot(self, *, ax=None, n_cols=3, line_kw=None, contour_kw=None):
def plot(
self,
*,
ax=None,
n_cols=3,
line_kw=None,
ice_lines_kw=None,
pd_line_kw=None,
contour_kw=None,
):
"""Plot partial dependence plots.

Parameters
Expand All @@ -865,6 +900,22 @@ def plot(self, *, ax=None, n_cols=3, line_kw=None, contour_kw=None):
Dict with keywords passed to the `matplotlib.pyplot.plot` call.
For one-way partial dependence plots.

ice_lines_kw : dict, default=None
Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.
For ICE lines in the one-way partial dependence plots.
The key value pairs defined in `ice_lines_kw` takes priority over
`line_kw`.

.. versionadded:: 1.0

pd_line_kw : dict, default=None
Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.
For partial dependence in one-way partial dependence plots.
The key value pairs defined in `pd_line_kw` takes priority over
`line_kw`.

.. versionadded:: 1.0

contour_kw : dict, default=None
Dict with keywords passed to the `matplotlib.pyplot.contourf`
call for two-way partial dependence plots.
Expand All @@ -880,6 +931,10 @@ def plot(self, *, ax=None, n_cols=3, line_kw=None, contour_kw=None):

if line_kw is None:
line_kw = {}
if ice_lines_kw is None:
ice_lines_kw = {}
if pd_line_kw is None:
pd_line_kw = {}
if contour_kw is None:
contour_kw = {}

Expand All @@ -893,14 +948,20 @@ def plot(self, *, ax=None, n_cols=3, line_kw=None, contour_kw=None):
"color": "C0",
"label": "average" if self.kind == "both" else None,
}
line_kw = {**default_line_kws, **line_kw}
if self.kind in ("individual", "both"):
default_ice_lines_kws = {"alpha": 0.3, "linewidth": 0.5}
else:
default_ice_lines_kws = {}

individual_line_kw = line_kw.copy()
del individual_line_kw["label"]
ice_lines_kw = {
**default_line_kws,
**line_kw,
**default_ice_lines_kws,
**ice_lines_kw,
}
del ice_lines_kw["label"]

if self.kind == "individual" or self.kind == "both":
individual_line_kw["alpha"] = 0.3
individual_line_kw["linewidth"] = 0.5
pd_line_kw = {**default_line_kws, **line_kw, **pd_line_kw}

n_features = len(self.features)
if self.kind in ("individual", "both"):
Expand Down Expand Up @@ -998,8 +1059,8 @@ def plot(self, *, ax=None, n_cols=3, line_kw=None, contour_kw=None):
n_cols,
pd_plot_idx,
n_lines,
individual_line_kw,
line_kw,
ice_lines_kw,
pd_line_kw,
)
else:
self._plot_two_way_partial_dependence(
Expand Down
Loading