Thanks to visit codestin.com
Credit goes to github.com

Skip to content
11 changes: 11 additions & 0 deletions doc/whats_new/v1.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,17 @@ Version 1.2.1

**In Development**


Changes impacting all modules
-----------------------------

- |Fix| Fix a bug where the current configuration was ignored in estimators using
`n_jobs > 1`. This bug was triggered for tasks dispatched by the ancillary
thread of `joblib` as :func:`sklearn.get_config` used to access an empty thread
local configuration instead of the configuration visible from the thread where
`joblib.Parallel` was first called.
:pr:`25290` by :user:`Guillaume Lemaitre <glemaitre>`.

Changelog
---------

Expand Down
4 changes: 3 additions & 1 deletion sklearn/calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from scipy.special import xlogy
from scipy.optimize import fmin_bfgs

from ._config import get_config
from .base import (
BaseEstimator,
ClassifierMixin,
Expand Down Expand Up @@ -392,9 +393,10 @@ def fit(self, X, y, sample_weight=None, **fit_params):
cv = check_cv(self.cv, y, classifier=True)

if self.ensemble:
config = get_config()
parallel = Parallel(n_jobs=self.n_jobs)
self.calibrated_classifiers_ = parallel(
delayed(_fit_classifier_calibrator_pair)(
delayed(_fit_classifier_calibrator_pair, config=config)(
clone(estimator),
X,
y,
Expand Down
7 changes: 5 additions & 2 deletions sklearn/cluster/_mean_shift.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from ..base import BaseEstimator, ClusterMixin
from ..neighbors import NearestNeighbors
from ..metrics.pairwise import pairwise_distances_argmin
from .._config import config_context
from .._config import config_context, get_config


@validate_params(
Expand Down Expand Up @@ -472,8 +472,11 @@ def fit(self, X, y=None):
nbrs = NearestNeighbors(radius=bandwidth, n_jobs=1).fit(X)

# execute iterations on all seeds in parallel
config = get_config()
all_res = Parallel(n_jobs=self.n_jobs)(
delayed(_mean_shift_single_seed)(seed, X, nbrs, self.max_iter)
delayed(_mean_shift_single_seed, config=config)(
seed, X, nbrs, self.max_iter
)
for seed in seeds
)
# copy results in a dictionary
Expand Down
4 changes: 3 additions & 1 deletion sklearn/compose/_column_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from scipy import sparse
from joblib import Parallel

from .._config import get_config
from ..base import clone, TransformerMixin
from ..utils._estimator_html_repr import _VisualBlock
from ..pipeline import _fit_transform_one, _transform_one, _name_estimators
Expand Down Expand Up @@ -661,8 +662,9 @@ def _fit_transform(self, X, y, func, fitted=False, column_as_strings=False):
)
)
try:
config = get_config()
return Parallel(n_jobs=self.n_jobs)(
delayed(func)(
delayed(func, config=config)(
transformer=clone(trans) if not fitted else trans,
X=_safe_indexing(X, column, axis=1),
y=y,
Expand Down
4 changes: 3 additions & 1 deletion sklearn/covariance/_graph_lasso.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

from . import empirical_covariance, EmpiricalCovariance, log_likelihood

from .._config import get_config
from ..exceptions import ConvergenceWarning
from ..utils.validation import (
_is_arraylike_not_scalar,
Expand Down Expand Up @@ -891,8 +892,9 @@ def fit(self, X, y=None):
# NOTE: Warm-restarting graphical_lasso_path has been tried,
# and this did not allow to gain anything
# (same execution time with or without).
config = get_config()
this_path = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
delayed(graphical_lasso_path)(
delayed(graphical_lasso_path, config=config)(
X[train],
alphas=alphas,
X_test=X[test],
Expand Down
4 changes: 3 additions & 1 deletion sklearn/decomposition/_dict_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from scipy import linalg
from joblib import Parallel, effective_n_jobs

from .._config import get_config
from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin
from ..utils import check_array, check_random_state, gen_even_slices, gen_batches
from ..utils import deprecated
Expand Down Expand Up @@ -409,8 +410,9 @@ def sparse_encode(
code = np.empty((n_samples, n_components))
slices = list(gen_even_slices(n_samples, effective_n_jobs(n_jobs)))

config = get_config()
code_views = Parallel(n_jobs=n_jobs, verbose=verbose)(
delayed(_sparse_encode)(
delayed(_sparse_encode, config=config)(
X[this_slice],
dictionary,
gram,
Expand Down
4 changes: 3 additions & 1 deletion sklearn/decomposition/_lda.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from scipy.special import gammaln, logsumexp
from joblib import Parallel, effective_n_jobs

from .._config import get_config
from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin
from ..utils import check_random_state, gen_batches, gen_even_slices
from ..utils.validation import check_non_negative
Expand Down Expand Up @@ -457,8 +458,9 @@ def _e_step(self, X, cal_sstats, random_init, parallel=None):
n_jobs = effective_n_jobs(self.n_jobs)
if parallel is None:
parallel = Parallel(n_jobs=n_jobs, verbose=max(0, self.verbose - 1))
config = get_config()
results = parallel(
delayed(_update_doc_distribution)(
delayed(_update_doc_distribution, config=config)(
X[idx_slice, :],
self.exp_dirichlet_component_,
self.doc_topic_prior_,
Expand Down
16 changes: 11 additions & 5 deletions sklearn/ensemble/_bagging.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from joblib import Parallel

from ._base import BaseEnsemble, _partition_estimators
from .._config import get_config
from ..base import ClassifierMixin, RegressorMixin
from ..metrics import r2_score, accuracy_score
from ..tree import DecisionTreeClassifier, DecisionTreeRegressor
Expand Down Expand Up @@ -471,10 +472,11 @@ def _fit(
seeds = random_state.randint(MAX_INT, size=n_more_estimators)
self._seeds = seeds

config = get_config()
all_results = Parallel(
n_jobs=n_jobs, verbose=self.verbose, **self._parallel_args()
)(
delayed(_parallel_build_estimators)(
delayed(_parallel_build_estimators, config=config)(
n_estimators[i],
self,
X,
Expand Down Expand Up @@ -864,10 +866,11 @@ def predict_proba(self, X):
# Parallel loop
n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)

config = get_config()
all_proba = Parallel(
n_jobs=n_jobs, verbose=self.verbose, **self._parallel_args()
)(
delayed(_parallel_predict_proba)(
delayed(_parallel_predict_proba, config=config)(
self.estimators_[starts[i] : starts[i + 1]],
self.estimators_features_[starts[i] : starts[i + 1]],
X,
Expand Down Expand Up @@ -914,8 +917,9 @@ def predict_log_proba(self, X):
# Parallel loop
n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)

config = get_config()
all_log_proba = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
delayed(_parallel_predict_log_proba)(
delayed(_parallel_predict_log_proba, config=config)(
self.estimators_[starts[i] : starts[i + 1]],
self.estimators_features_[starts[i] : starts[i + 1]],
X,
Expand Down Expand Up @@ -969,8 +973,9 @@ def decision_function(self, X):
# Parallel loop
n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)

config = get_config()
all_decisions = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
delayed(_parallel_decision_function)(
delayed(_parallel_decision_function, config=config)(
self.estimators_[starts[i] : starts[i + 1]],
self.estimators_features_[starts[i] : starts[i + 1]],
X,
Expand Down Expand Up @@ -1218,8 +1223,9 @@ def predict(self, X):
# Parallel loop
n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)

config = get_config()
all_y_hat = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
delayed(_parallel_predict_regression)(
delayed(_parallel_predict_regression, config=config)(
self.estimators_[starts[i] : starts[i + 1]],
self.estimators_features_[starts[i] : starts[i + 1]],
X,
Expand Down
28 changes: 18 additions & 10 deletions sklearn/ensemble/_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ class calls the ``fit`` method of each sub-estimator on random samples
from scipy.sparse import hstack as sparse_hstack
from joblib import Parallel

from .._config import get_config
from ..base import is_classifier
from ..base import ClassifierMixin, MultiOutputMixin, RegressorMixin, TransformerMixin

Expand Down Expand Up @@ -263,11 +264,11 @@ def apply(self, X):
return the index of the leaf x ends up in.
"""
X = self._validate_X_predict(X)
results = Parallel(
n_jobs=self.n_jobs,
verbose=self.verbose,
prefer="threads",
)(delayed(tree.apply)(X, check_input=False) for tree in self.estimators_)
config = get_config()
results = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, prefer="threads")(
delayed(tree.apply, config=config)(X, check_input=False)
for tree in self.estimators_
)

return np.array(results).T

Expand Down Expand Up @@ -296,12 +297,13 @@ def decision_path(self, X):
gives the indicator value for the i-th estimator.
"""
X = self._validate_X_predict(X)
config = get_config()
indicators = Parallel(
n_jobs=self.n_jobs,
verbose=self.verbose,
prefer="threads",
)(
delayed(tree.decision_path)(X, check_input=False)
delayed(tree.decision_path, config=config)(X, check_input=False)
for tree in self.estimators_
)

Expand Down Expand Up @@ -471,12 +473,13 @@ def fit(self, X, y, sample_weight=None):
# that case. However, for joblib 0.12+ we respect any
# parallel_backend contexts set at a higher level,
# since correctness does not rely on using threads.
config = get_config()
trees = Parallel(
n_jobs=self.n_jobs,
verbose=self.verbose,
prefer="threads",
)(
delayed(_parallel_build_trees)(
delayed(_parallel_build_trees, config=config)(
t,
self.bootstrap,
X,
Expand Down Expand Up @@ -638,8 +641,9 @@ def feature_importances_(self):
"""
check_is_fitted(self)

config = get_config()
all_importances = Parallel(n_jobs=self.n_jobs, prefer="threads")(
delayed(getattr)(tree, "feature_importances_")
delayed(getattr, config=config)(tree, "feature_importances_")
for tree in self.estimators_
if tree.tree_.node_count > 1
)
Expand Down Expand Up @@ -886,9 +890,12 @@ def predict_proba(self, X):
np.zeros((X.shape[0], j), dtype=np.float64)
for j in np.atleast_1d(self.n_classes_)
]
config = get_config()
lock = threading.Lock()
Parallel(n_jobs=n_jobs, verbose=self.verbose, require="sharedmem")(
delayed(_accumulate_prediction)(e.predict_proba, X, all_proba, lock)
delayed(_accumulate_prediction, config=config)(
e.predict_proba, X, all_proba, lock
)
for e in self.estimators_
)

Expand Down Expand Up @@ -1007,9 +1014,10 @@ def predict(self, X):
y_hat = np.zeros((X.shape[0]), dtype=np.float64)

# Parallel loop
config = get_config()
lock = threading.Lock()
Parallel(n_jobs=n_jobs, verbose=self.verbose, require="sharedmem")(
delayed(_accumulate_prediction)(e.predict, X, [y_hat], lock)
delayed(_accumulate_prediction, config=config)(e.predict, X, [y_hat], lock)
for e in self.estimators_
)

Expand Down
9 changes: 7 additions & 2 deletions sklearn/ensemble/_stacking.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from joblib import Parallel
import scipy.sparse as sparse

from .._config import get_config
from ..base import clone
from ..base import ClassifierMixin, RegressorMixin, TransformerMixin
from ..base import is_classifier, is_regressor
Expand Down Expand Up @@ -207,8 +208,11 @@ def fit(self, X, y, sample_weight=None):
# Fit the base estimators on the whole training data. Those
# base estimators will be used in transform, predict, and
# predict_proba. They are exposed publicly.
config = get_config()
self.estimators_ = Parallel(n_jobs=self.n_jobs)(
delayed(_fit_single_estimator)(clone(est), X, y, sample_weight)
delayed(_fit_single_estimator, config=config)(
clone(est), X, y, sample_weight
)
for est in all_estimators
if est != "drop"
)
Expand Down Expand Up @@ -247,11 +251,12 @@ def fit(self, X, y, sample_weight=None):
if hasattr(cv, "random_state") and cv.random_state is None:
cv.random_state = np.random.RandomState()

config = get_config()
fit_params = (
{"sample_weight": sample_weight} if sample_weight is not None else None
)
predictions = Parallel(n_jobs=self.n_jobs)(
delayed(cross_val_predict)(
delayed(cross_val_predict, config=config)(
clone(est),
X,
y,
Expand Down
4 changes: 3 additions & 1 deletion sklearn/ensemble/_voting.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from joblib import Parallel

from .._config import get_config
from ..base import ClassifierMixin
from ..base import RegressorMixin
from ..base import TransformerMixin
Expand Down Expand Up @@ -80,8 +81,9 @@ def fit(self, X, y, sample_weight=None):
f" {len(self.weights)} weights, {len(self.estimators)} estimators"
)

config = get_config()
self.estimators_ = Parallel(n_jobs=self.n_jobs)(
delayed(_fit_single_estimator)(
delayed(_fit_single_estimator, config=config)(
clone(clf),
X,
y,
Expand Down
8 changes: 8 additions & 0 deletions sklearn/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

__all__ = [
"NotFittedError",
"ConfigPropagationWarning",
"ConvergenceWarning",
"DataConversionWarning",
"DataDimensionalityWarning",
Expand Down Expand Up @@ -38,6 +39,13 @@ class NotFittedError(ValueError, AttributeError):
"""


class ConfigPropagationWarning(UserWarning):
"""Notify about lack of config propagation to the child processes.

.. versionadded:: 1.3
"""


class ConvergenceWarning(UserWarning):
"""Custom warning to capture convergence problems

Expand Down
5 changes: 3 additions & 2 deletions sklearn/feature_selection/_rfe.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from numbers import Integral, Real
from joblib import Parallel, effective_n_jobs


from .._config import get_config
from ..utils.metaestimators import available_if
from ..utils.metaestimators import _safe_split
from ..utils._param_validation import HasMethods, Interval
Expand Down Expand Up @@ -719,8 +719,9 @@ def fit(self, X, y, groups=None):
if effective_n_jobs(self.n_jobs) == 1:
parallel, func = list, _rfe_single_fit
else:
config = get_config()
parallel = Parallel(n_jobs=self.n_jobs)
func = delayed(_rfe_single_fit)
func = delayed(_rfe_single_fit, config=config)

scores = parallel(
func(rfe, self.estimator, X, y, train, test, scorer)
Expand Down
Loading