scikit-learn · glemaitre · Jan 4, 2023 · Jan 4, 2023 · Jan 4, 2023 · Jan 4, 2023
diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst
@@ -9,6 +9,17 @@ Version 1.2.1
 
 **In Development**
 
+
+Changes impacting all modules
+-----------------------------
+
+- |Fix| Fix a bug where the current configuration was ignored in estimators using
+  `n_jobs > 1`. This bug was triggered for tasks dispatched by the ancillary
+  thread of `joblib` as :func:`sklearn.get_config` used to access an empty thread
+  local configuration instead of the configuration visible from the thread where
+  `joblib.Parallel` was first called.
+  :pr:`25290` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 Changelog
 ---------
 

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
@@ -20,6 +20,7 @@
 from scipy.special import xlogy
 from scipy.optimize import fmin_bfgs
 
+from ._config import get_config
 from .base import (
     BaseEstimator,
     ClassifierMixin,
@@ -392,9 +393,10 @@ def fit(self, X, y, sample_weight=None, **fit_params):
             cv = check_cv(self.cv, y, classifier=True)
 
             if self.ensemble:
+                config = get_config()
                 parallel = Parallel(n_jobs=self.n_jobs)
                 self.calibrated_classifiers_ = parallel(
-                    delayed(_fit_classifier_calibrator_pair)(
+                    delayed(_fit_classifier_calibrator_pair, config=config)(
                         clone(estimator),
                         X,
                         y,

diff --git a/sklearn/cluster/_mean_shift.py b/sklearn/cluster/_mean_shift.py
@@ -27,7 +27,7 @@
 from ..base import BaseEstimator, ClusterMixin
 from ..neighbors import NearestNeighbors
 from ..metrics.pairwise import pairwise_distances_argmin
-from .._config import config_context
+from .._config import config_context, get_config
 
 
 @validate_params(
@@ -472,8 +472,11 @@ def fit(self, X, y=None):
         nbrs = NearestNeighbors(radius=bandwidth, n_jobs=1).fit(X)
 
         # execute iterations on all seeds in parallel
+        config = get_config()
         all_res = Parallel(n_jobs=self.n_jobs)(
-            delayed(_mean_shift_single_seed)(seed, X, nbrs, self.max_iter)
+            delayed(_mean_shift_single_seed, config=config)(
+                seed, X, nbrs, self.max_iter
+            )
             for seed in seeds
         )
         # copy results in a dictionary

diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
@@ -14,6 +14,7 @@
 from scipy import sparse
 from joblib import Parallel
 
+from .._config import get_config
 from ..base import clone, TransformerMixin
 from ..utils._estimator_html_repr import _VisualBlock
 from ..pipeline import _fit_transform_one, _transform_one, _name_estimators
@@ -661,8 +662,9 @@ def _fit_transform(self, X, y, func, fitted=False, column_as_strings=False):
             )
         )
         try:
+            config = get_config()
             return Parallel(n_jobs=self.n_jobs)(
-                delayed(func)(
+                delayed(func, config=config)(
                     transformer=clone(trans) if not fitted else trans,
                     X=_safe_indexing(X, column, axis=1),
                     y=y,

diff --git a/sklearn/covariance/_graph_lasso.py b/sklearn/covariance/_graph_lasso.py
@@ -17,6 +17,7 @@
 
 from . import empirical_covariance, EmpiricalCovariance, log_likelihood
 
+from .._config import get_config
 from ..exceptions import ConvergenceWarning
 from ..utils.validation import (
     _is_arraylike_not_scalar,
@@ -891,8 +892,9 @@ def fit(self, X, y=None):
                 # NOTE: Warm-restarting graphical_lasso_path has been tried,
                 # and this did not allow to gain anything
                 # (same execution time with or without).
+                config = get_config()
                 this_path = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
-                    delayed(graphical_lasso_path)(
+                    delayed(graphical_lasso_path, config=config)(
                         X[train],
                         alphas=alphas,
                         X_test=X[test],

diff --git a/sklearn/decomposition/_dict_learning.py b/sklearn/decomposition/_dict_learning.py
@@ -15,6 +15,7 @@
 from scipy import linalg
 from joblib import Parallel, effective_n_jobs
 
+from .._config import get_config
 from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin
 from ..utils import check_array, check_random_state, gen_even_slices, gen_batches
 from ..utils import deprecated
@@ -409,8 +410,9 @@ def sparse_encode(
     code = np.empty((n_samples, n_components))
     slices = list(gen_even_slices(n_samples, effective_n_jobs(n_jobs)))
 
+    config = get_config()
     code_views = Parallel(n_jobs=n_jobs, verbose=verbose)(
-        delayed(_sparse_encode)(
+        delayed(_sparse_encode, config=config)(
             X[this_slice],
             dictionary,
             gram,

diff --git a/sklearn/decomposition/_lda.py b/sklearn/decomposition/_lda.py
@@ -17,6 +17,7 @@
 from scipy.special import gammaln, logsumexp
 from joblib import Parallel, effective_n_jobs
 
+from .._config import get_config
 from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin
 from ..utils import check_random_state, gen_batches, gen_even_slices
 from ..utils.validation import check_non_negative
@@ -457,8 +458,9 @@ def _e_step(self, X, cal_sstats, random_init, parallel=None):
         n_jobs = effective_n_jobs(self.n_jobs)
         if parallel is None:
             parallel = Parallel(n_jobs=n_jobs, verbose=max(0, self.verbose - 1))
+        config = get_config()
         results = parallel(
-            delayed(_update_doc_distribution)(
+            delayed(_update_doc_distribution, config=config)(
                 X[idx_slice, :],
                 self.exp_dirichlet_component_,
                 self.doc_topic_prior_,

diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py
@@ -15,6 +15,7 @@
 from joblib import Parallel
 
 from ._base import BaseEnsemble, _partition_estimators
+from .._config import get_config
 from ..base import ClassifierMixin, RegressorMixin
 from ..metrics import r2_score, accuracy_score
 from ..tree import DecisionTreeClassifier, DecisionTreeRegressor
@@ -471,10 +472,11 @@ def _fit(
         seeds = random_state.randint(MAX_INT, size=n_more_estimators)
         self._seeds = seeds
 
+        config = get_config()
         all_results = Parallel(
             n_jobs=n_jobs, verbose=self.verbose, **self._parallel_args()
         )(
-            delayed(_parallel_build_estimators)(
+            delayed(_parallel_build_estimators, config=config)(
                 n_estimators[i],
                 self,
                 X,
@@ -864,10 +866,11 @@ def predict_proba(self, X):
         # Parallel loop
         n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)
 
+        config = get_config()
         all_proba = Parallel(
             n_jobs=n_jobs, verbose=self.verbose, **self._parallel_args()
         )(
-            delayed(_parallel_predict_proba)(
+            delayed(_parallel_predict_proba, config=config)(
                 self.estimators_[starts[i] : starts[i + 1]],
                 self.estimators_features_[starts[i] : starts[i + 1]],
                 X,
@@ -914,8 +917,9 @@ def predict_log_proba(self, X):
             # Parallel loop
             n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)
 
+            config = get_config()
             all_log_proba = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
-                delayed(_parallel_predict_log_proba)(
+                delayed(_parallel_predict_log_proba, config=config)(
                     self.estimators_[starts[i] : starts[i + 1]],
                     self.estimators_features_[starts[i] : starts[i + 1]],
                     X,
@@ -969,8 +973,9 @@ def decision_function(self, X):
         # Parallel loop
         n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)
 
+        config = get_config()
         all_decisions = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
-            delayed(_parallel_decision_function)(
+            delayed(_parallel_decision_function, config=config)(
                 self.estimators_[starts[i] : starts[i + 1]],
                 self.estimators_features_[starts[i] : starts[i + 1]],
                 X,
@@ -1218,8 +1223,9 @@ def predict(self, X):
         # Parallel loop
         n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)
 
+        config = get_config()
         all_y_hat = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
-            delayed(_parallel_predict_regression)(
+            delayed(_parallel_predict_regression, config=config)(
                 self.estimators_[starts[i] : starts[i + 1]],
                 self.estimators_features_[starts[i] : starts[i + 1]],
                 X,

diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py
@@ -50,6 +50,7 @@ class calls the ``fit`` method of each sub-estimator on random samples
 from scipy.sparse import hstack as sparse_hstack
 from joblib import Parallel
 
+from .._config import get_config
 from ..base import is_classifier
 from ..base import ClassifierMixin, MultiOutputMixin, RegressorMixin, TransformerMixin
 
@@ -263,11 +264,11 @@ def apply(self, X):
             return the index of the leaf x ends up in.
         """
         X = self._validate_X_predict(X)
-        results = Parallel(
-            n_jobs=self.n_jobs,
-            verbose=self.verbose,
-            prefer="threads",
-        )(delayed(tree.apply)(X, check_input=False) for tree in self.estimators_)
+        config = get_config()
+        results = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, prefer="threads")(
+            delayed(tree.apply, config=config)(X, check_input=False)
+            for tree in self.estimators_
+        )
 
         return np.array(results).T
 
@@ -296,12 +297,13 @@ def decision_path(self, X):
             gives the indicator value for the i-th estimator.
         """
         X = self._validate_X_predict(X)
+        config = get_config()
         indicators = Parallel(
             n_jobs=self.n_jobs,
             verbose=self.verbose,
             prefer="threads",
         )(
-            delayed(tree.decision_path)(X, check_input=False)
+            delayed(tree.decision_path, config=config)(X, check_input=False)
             for tree in self.estimators_
         )
 
@@ -471,12 +473,13 @@ def fit(self, X, y, sample_weight=None):
             # that case. However, for joblib 0.12+ we respect any
             # parallel_backend contexts set at a higher level,
             # since correctness does not rely on using threads.
+            config = get_config()
             trees = Parallel(
                 n_jobs=self.n_jobs,
                 verbose=self.verbose,
                 prefer="threads",
             )(
-                delayed(_parallel_build_trees)(
+                delayed(_parallel_build_trees, config=config)(
                     t,
                     self.bootstrap,
                     X,
@@ -638,8 +641,9 @@ def feature_importances_(self):
         """
         check_is_fitted(self)
 
+        config = get_config()
         all_importances = Parallel(n_jobs=self.n_jobs, prefer="threads")(
-            delayed(getattr)(tree, "feature_importances_")
+            delayed(getattr, config=config)(tree, "feature_importances_")
             for tree in self.estimators_
             if tree.tree_.node_count > 1
         )
@@ -886,9 +890,12 @@ def predict_proba(self, X):
             np.zeros((X.shape[0], j), dtype=np.float64)
             for j in np.atleast_1d(self.n_classes_)
         ]
+        config = get_config()
         lock = threading.Lock()
         Parallel(n_jobs=n_jobs, verbose=self.verbose, require="sharedmem")(
-            delayed(_accumulate_prediction)(e.predict_proba, X, all_proba, lock)
+            delayed(_accumulate_prediction, config=config)(
+                e.predict_proba, X, all_proba, lock
+            )
             for e in self.estimators_
         )
 
@@ -1007,9 +1014,10 @@ def predict(self, X):
             y_hat = np.zeros((X.shape[0]), dtype=np.float64)
 
         # Parallel loop
+        config = get_config()
         lock = threading.Lock()
         Parallel(n_jobs=n_jobs, verbose=self.verbose, require="sharedmem")(
-            delayed(_accumulate_prediction)(e.predict, X, [y_hat], lock)
+            delayed(_accumulate_prediction, config=config)(e.predict, X, [y_hat], lock)
             for e in self.estimators_
         )
 

diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py
@@ -11,6 +11,7 @@
 from joblib import Parallel
 import scipy.sparse as sparse
 
+from .._config import get_config
 from ..base import clone
 from ..base import ClassifierMixin, RegressorMixin, TransformerMixin
 from ..base import is_classifier, is_regressor
@@ -207,8 +208,11 @@ def fit(self, X, y, sample_weight=None):
             # Fit the base estimators on the whole training data. Those
             # base estimators will be used in transform, predict, and
             # predict_proba. They are exposed publicly.
+            config = get_config()
             self.estimators_ = Parallel(n_jobs=self.n_jobs)(
-                delayed(_fit_single_estimator)(clone(est), X, y, sample_weight)
+                delayed(_fit_single_estimator, config=config)(
+                    clone(est), X, y, sample_weight
+                )
                 for est in all_estimators
                 if est != "drop"
             )
@@ -247,11 +251,12 @@ def fit(self, X, y, sample_weight=None):
             if hasattr(cv, "random_state") and cv.random_state is None:
                 cv.random_state = np.random.RandomState()
 
+            config = get_config()
             fit_params = (
                 {"sample_weight": sample_weight} if sample_weight is not None else None
             )
             predictions = Parallel(n_jobs=self.n_jobs)(
-                delayed(cross_val_predict)(
+                delayed(cross_val_predict, config=config)(
                     clone(est),
                     X,
                     y,

diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py
@@ -20,6 +20,7 @@
 
 from joblib import Parallel
 
+from .._config import get_config
 from ..base import ClassifierMixin
 from ..base import RegressorMixin
 from ..base import TransformerMixin
@@ -80,8 +81,9 @@ def fit(self, X, y, sample_weight=None):
                 f" {len(self.weights)} weights, {len(self.estimators)} estimators"
             )
 
+        config = get_config()
         self.estimators_ = Parallel(n_jobs=self.n_jobs)(
-            delayed(_fit_single_estimator)(
+            delayed(_fit_single_estimator, config=config)(
                 clone(clf),
                 X,
                 y,

diff --git a/sklearn/exceptions.py b/sklearn/exceptions.py
@@ -5,6 +5,7 @@
 
 __all__ = [
     "NotFittedError",
+    "ConfigPropagationWarning",
     "ConvergenceWarning",
     "DataConversionWarning",
     "DataDimensionalityWarning",
@@ -38,6 +39,13 @@ class NotFittedError(ValueError, AttributeError):
     """
 
 
+class ConfigPropagationWarning(UserWarning):
+    """Notify about lack of config propagation to the child processes.
+
+    .. versionadded:: 1.3
+    """
+
+
 class ConvergenceWarning(UserWarning):
     """Custom warning to capture convergence problems
 

diff --git a/sklearn/feature_selection/_rfe.py b/sklearn/feature_selection/_rfe.py
@@ -10,7 +10,7 @@
 from numbers import Integral, Real
 from joblib import Parallel, effective_n_jobs
 
-
+from .._config import get_config
 from ..utils.metaestimators import available_if
 from ..utils.metaestimators import _safe_split
 from ..utils._param_validation import HasMethods, Interval
@@ -719,8 +719,9 @@ def fit(self, X, y, groups=None):
         if effective_n_jobs(self.n_jobs) == 1:
             parallel, func = list, _rfe_single_fit
         else:
+            config = get_config()
             parallel = Parallel(n_jobs=self.n_jobs)
-            func = delayed(_rfe_single_fit)
+            func = delayed(_rfe_single_fit, config=config)
 
         scores = parallel(
             func(rfe, self.estimator, X, y, train, test, scorer)