From dfe19bfd8aa21b4a44fa734d0ad630536a5b8518 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Wed, 23 Feb 2022 17:54:31 +0100
Subject: [PATCH 1/7] MAINT Do not propagate n_jobs as n_threads

`n_jobs` is to be used for joblib tasks but not for
the number of threads to use for OpenMP.
---
 sklearn/neighbors/_base.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/neighbors/_base.py b/sklearn/neighbors/_base.py
index 5f8f9966ae349..c2afadebf6044 100644
--- a/sklearn/neighbors/_base.py
+++ b/sklearn/neighbors/_base.py
@@ -764,7 +764,6 @@ class from an array representing our data set and ask who's
                 k=n_neighbors,
                 metric=self.effective_metric_,
                 metric_kwargs=self.effective_metric_params_,
-                n_threads=self.n_jobs,
                 strategy="auto",
                 return_distance=return_distance,
             )

From 23ebeee6bedda72be1fb5d4e95ed309c46cccce1 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Wed, 23 Feb 2022 18:25:03 +0100
Subject: [PATCH 2/7] MAINT Do not expose n_threads

It is not needed (yet), hence let's just remove it.
---
 .../metrics/_pairwise_distances_reduction.pyx | 34 +------------------
 1 file changed, 1 insertion(+), 33 deletions(-)

diff --git a/sklearn/metrics/_pairwise_distances_reduction.pyx b/sklearn/metrics/_pairwise_distances_reduction.pyx
index df0918bb61334..7dae016b2334d 100644
--- a/sklearn/metrics/_pairwise_distances_reduction.pyx
+++ b/sklearn/metrics/_pairwise_distances_reduction.pyx
@@ -112,14 +112,6 @@ cdef class PairwiseDistancesReduction:
         scikit-learn configuration for `pairwise_dist_chunk_size`,
         and use 256 if it is not set.
 
-    n_threads: int, default=None
-        The number of OpenMP threads to use for the reduction.
-        Parallelism is done on chunks and the sharding of chunks
-        depends on the `strategy` set on :method:`~PairwiseDistancesReduction.compute`.
-
-        See _openmp_effective_n_threads, for details about
-        the specification of n_threads.
-
     strategy : str, {'auto', 'parallel_on_X', 'parallel_on_Y'}, default=None
         The chunking strategy defining which dataset parallelization are made on.
 
@@ -220,7 +212,6 @@ cdef class PairwiseDistancesReduction:
         self,
         DatasetsPair datasets_pair,
         chunk_size=None,
-        n_threads=None,
         strategy=None,
      ):
         cdef:
@@ -231,7 +222,7 @@ cdef class PairwiseDistancesReduction:
 
         self.chunk_size = check_scalar(chunk_size, "chunk_size", Integral, min_val=20)
 
-        self.effective_n_threads = _openmp_effective_n_threads(n_threads)
+        self.effective_n_threads = _openmp_effective_n_threads()
 
         self.datasets_pair = datasets_pair
 
@@ -512,15 +503,6 @@ cdef class PairwiseDistancesArgKmin(PairwiseDistancesReduction):
         scikit-learn configuration for `pairwise_dist_chunk_size`,
         and use 256 if it is not set.
 
-    n_threads: int, default=None
-        The number of OpenMP threads to use for the reduction.
-        Parallelism is done on chunks and the sharding of chunks
-        depends on the `strategy` set on
-        :meth:`~PairwiseDistancesArgKmin.compute`.
-
-        See _openmp_effective_n_threads, for details about
-        the specification of n_threads.
-
     k: int, default=1
         The k for the argkmin reduction.
     """
@@ -544,7 +526,6 @@ cdef class PairwiseDistancesArgKmin(PairwiseDistancesReduction):
         str metric="euclidean",
         chunk_size=None,
         dict metric_kwargs=None,
-        n_threads=None,
         str strategy=None,
         bint return_distance=False,
     ):
@@ -574,15 +555,6 @@ cdef class PairwiseDistancesArgKmin(PairwiseDistancesReduction):
         metric_kwargs : dict, default=None
             Keyword arguments to pass to specified metric function.
 
-        n_threads : int, default=None
-            The number of OpenMP threads to use for the reduction.
-            Parallelism is done on chunks and the sharding of chunks
-            depends on the `strategy` set on
-            :meth:`~PairwiseDistancesArgKmin.compute`.
-
-            See _openmp_effective_n_threads, for details about
-            the specification of n_threads.
-
         strategy : str, {'auto', 'parallel_on_X', 'parallel_on_Y'}, default=None
             The chunking strategy defining which dataset parallelization are made on.
 
@@ -686,14 +658,12 @@ cdef class PairwiseDistancesArgKmin(PairwiseDistancesReduction):
         self,
         DatasetsPair datasets_pair,
         chunk_size=None,
-        n_threads=None,
         strategy=None,
         ITYPE_t k=1,
     ):
         super().__init__(
             datasets_pair=datasets_pair,
             chunk_size=chunk_size,
-            n_threads=n_threads,
             strategy=strategy,
         )
         self.k = check_scalar(k, "k", Integral, min_val=1)
@@ -932,7 +902,6 @@ cdef class FastEuclideanPairwiseDistancesArgKmin(PairwiseDistancesArgKmin):
         ITYPE_t k,
         bint use_squared_distances=False,
         chunk_size=None,
-        n_threads=None,
         strategy=None,
         metric_kwargs=None,
     ):
@@ -948,7 +917,6 @@ cdef class FastEuclideanPairwiseDistancesArgKmin(PairwiseDistancesArgKmin):
             # The datasets pair here is used for exact distances computations
             datasets_pair=DatasetsPair.get_for(X, Y, metric="euclidean"),
             chunk_size=chunk_size,
-            n_threads=n_threads,
             strategy=strategy,
             k=k,
         )

From ec640206cfe301050c271ffd37d92a792faf8221 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Wed, 23 Feb 2022 18:26:23 +0100
Subject: [PATCH 3/7] DOC Add remark for n_jobs regarding low-level parallelism

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 sklearn/neighbors/_classification.py | 4 ++++
 sklearn/neighbors/_regression.py     | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/sklearn/neighbors/_classification.py b/sklearn/neighbors/_classification.py
index bcad8c71aee07..890405da86222 100644
--- a/sklearn/neighbors/_classification.py
+++ b/sklearn/neighbors/_classification.py
@@ -84,6 +84,8 @@ class KNeighborsClassifier(KNeighborsMixin, ClassifierMixin, NeighborsBase):
         ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
         for more details.
         Doesn't affect :meth:`fit` method.
+        Note that even if n_jobs=1, low-level parallelism (via Numpy and OpenMP)
+        might be used in some configuration. See :ref:`User Guide <computing>`.
 
     Attributes
     ----------
@@ -381,6 +383,8 @@ class RadiusNeighborsClassifier(RadiusNeighborsMixin, ClassifierMixin, Neighbors
         ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
         ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
         for more details.
+        Note that even if n_jobs=1, low-level parallelism (via Numpy and OpenMP)
+        might be used in some configuration. See :ref:`User Guide <computing>`.
 
     **kwargs : dict
         Additional keyword arguments passed to the constructor.
diff --git a/sklearn/neighbors/_regression.py b/sklearn/neighbors/_regression.py
index 1bc02fedba212..4ed398ba9167d 100644
--- a/sklearn/neighbors/_regression.py
+++ b/sklearn/neighbors/_regression.py
@@ -91,6 +91,8 @@ class KNeighborsRegressor(KNeighborsMixin, RegressorMixin, NeighborsBase):
         ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
         for more details.
         Doesn't affect :meth:`fit` method.
+        Note that even if n_jobs=1, low-level parallelism (via Numpy and OpenMP)
+        might be used in some configuration. See :ref:`User Guide <computing>`.
 
     Attributes
     ----------
@@ -316,6 +318,8 @@ class RadiusNeighborsRegressor(RadiusNeighborsMixin, RegressorMixin, NeighborsBa
         ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
         ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
         for more details.
+        Note that even if n_jobs=1, low-level parallelism (via Numpy and OpenMP)
+        might be used in some configuration. See :ref:`User Guide <computing>`.
 
     Attributes
     ----------

From fb199156399db987ffcb59bd6ad76d2e7a5f144a Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Wed, 23 Feb 2022 18:28:51 +0100
Subject: [PATCH 4/7] TST Remove unneeded test

---
 .../test_pairwise_distances_reduction.py      | 42 -------------------
 1 file changed, 42 deletions(-)

diff --git a/sklearn/metrics/tests/test_pairwise_distances_reduction.py b/sklearn/metrics/tests/test_pairwise_distances_reduction.py
index b9f3d7dbf3dd5..b84984dc25cf6 100644
--- a/sklearn/metrics/tests/test_pairwise_distances_reduction.py
+++ b/sklearn/metrics/tests/test_pairwise_distances_reduction.py
@@ -194,48 +194,6 @@ def test_chunk_size_agnosticism(
     ASSERT_RESULT[PairwiseDistancesReduction](ref_dist, dist, ref_indices, indices)
 
 
-@pytest.mark.parametrize("seed", range(5))
-@pytest.mark.parametrize("n_samples", [100, 1000])
-@pytest.mark.parametrize("chunk_size", [50, 512, 1024])
-@pytest.mark.parametrize(
-    "PairwiseDistancesReduction",
-    [PairwiseDistancesArgKmin],
-)
-def test_n_threads_agnosticism(
-    PairwiseDistancesReduction,
-    seed,
-    n_samples,
-    chunk_size,
-    n_features=100,
-    dtype=np.float64,
-):
-    # Results should not depend on the number of threads
-    rng = np.random.RandomState(seed)
-    spread = 100
-    X = rng.rand(n_samples, n_features).astype(dtype) * spread
-    Y = rng.rand(n_samples, n_features).astype(dtype) * spread
-
-    parameter = (
-        10
-        if PairwiseDistancesReduction is PairwiseDistancesArgKmin
-        # Scaling the radius slightly with the numbers of dimensions
-        else 10 ** np.log(n_features)
-    )
-
-    ref_dist, ref_indices = PairwiseDistancesReduction.compute(
-        X,
-        Y,
-        parameter,
-        return_distance=True,
-    )
-
-    dist, indices = PairwiseDistancesReduction.compute(
-        X, Y, parameter, n_threads=1, return_distance=True
-    )
-
-    ASSERT_RESULT[PairwiseDistancesReduction](ref_dist, dist, ref_indices, indices)
-
-
 # TODO: Remove filterwarnings in 1.3 when wminkowski is removed
 @pytest.mark.filterwarnings("ignore:WMinkowskiDistance:FutureWarning:sklearn")
 @pytest.mark.parametrize("seed", range(5))

From 8df2d7ef8442fddce1897541a8c7333abe8cbeba Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Thu, 24 Feb 2022 10:22:12 +0100
Subject: [PATCH 5/7] TST Use threadpoolctl.threadpool_limits config context
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit fb199156399db987ffcb59bd6ad76d2e7a5f144a.

Co-authored-by: Jérémie du Boisberranger <jeremiedbb@users.noreply.github.com>
---
 .../test_pairwise_distances_reduction.py      | 44 +++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/sklearn/metrics/tests/test_pairwise_distances_reduction.py b/sklearn/metrics/tests/test_pairwise_distances_reduction.py
index b84984dc25cf6..d2af7e7988aa8 100644
--- a/sklearn/metrics/tests/test_pairwise_distances_reduction.py
+++ b/sklearn/metrics/tests/test_pairwise_distances_reduction.py
@@ -1,5 +1,6 @@
 import numpy as np
 import pytest
+import threadpoolctl
 from numpy.testing import assert_array_equal, assert_allclose
 from scipy.sparse import csr_matrix
 from scipy.spatial.distance import cdist
@@ -194,6 +195,49 @@ def test_chunk_size_agnosticism(
     ASSERT_RESULT[PairwiseDistancesReduction](ref_dist, dist, ref_indices, indices)
 
 
+@pytest.mark.parametrize("seed", range(5))
+@pytest.mark.parametrize("n_samples", [100, 1000])
+@pytest.mark.parametrize("chunk_size", [50, 512, 1024])
+@pytest.mark.parametrize(
+    "PairwiseDistancesReduction",
+    [PairwiseDistancesArgKmin],
+)
+def test_n_threads_agnosticism(
+    PairwiseDistancesReduction,
+    seed,
+    n_samples,
+    chunk_size,
+    n_features=100,
+    dtype=np.float64,
+):
+    # Results should not depend on the number of threads
+    rng = np.random.RandomState(seed)
+    spread = 100
+    X = rng.rand(n_samples, n_features).astype(dtype) * spread
+    Y = rng.rand(n_samples, n_features).astype(dtype) * spread
+
+    parameter = (
+        10
+        if PairwiseDistancesReduction is PairwiseDistancesArgKmin
+        # Scaling the radius slightly with the numbers of dimensions
+        else 10 ** np.log(n_features)
+    )
+
+    ref_dist, ref_indices = PairwiseDistancesReduction.compute(
+        X,
+        Y,
+        parameter,
+        return_distance=True,
+    )
+
+    with threadpoolctl.threadpool_limits(limits=1, user_api="openmp"):
+        dist, indices = PairwiseDistancesReduction.compute(
+            X, Y, parameter, return_distance=True
+        )
+
+    ASSERT_RESULT[PairwiseDistancesReduction](ref_dist, dist, ref_indices, indices)
+
+
 # TODO: Remove filterwarnings in 1.3 when wminkowski is removed
 @pytest.mark.filterwarnings("ignore:WMinkowskiDistance:FutureWarning:sklearn")
 @pytest.mark.parametrize("seed", range(5))

From 8accf40d6f4009c581f6407369aed612a5e1444b Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Thu, 24 Feb 2022 10:33:07 +0100
Subject: [PATCH 6/7] DOC Move remark to the glossary

Co-authored-by: Thomas J. Fan <thomasjpfan@gmail.com>
---
 doc/glossary.rst                     | 5 ++++-
 sklearn/neighbors/_classification.py | 4 ----
 sklearn/neighbors/_regression.py     | 4 ----
 3 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/doc/glossary.rst b/doc/glossary.rst
index f6b93179ca21c..abd60241568a9 100644
--- a/doc/glossary.rst
+++ b/doc/glossary.rst
@@ -1555,9 +1555,12 @@ functions or non-estimator constructors.
         generally be interpreted as ``n_jobs=1``, unless the current
         :class:`joblib.Parallel` backend context specifies otherwise.
 
+        Note that even if ``n_jobs=1``, low-level parallelism (via Numpy and OpenMP)
+        might be used in some configuration.
+
         For more details on the use of ``joblib`` and its interactions with
         scikit-learn, please refer to our :ref:`parallelism notes
-        <parallelism>`.
+        <parallelism>` and :ref:`User Guide <computing>`.
 
     ``pos_label``
         Value with which positive labels must be encoded in binary
diff --git a/sklearn/neighbors/_classification.py b/sklearn/neighbors/_classification.py
index 890405da86222..bcad8c71aee07 100644
--- a/sklearn/neighbors/_classification.py
+++ b/sklearn/neighbors/_classification.py
@@ -84,8 +84,6 @@ class KNeighborsClassifier(KNeighborsMixin, ClassifierMixin, NeighborsBase):
         ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
         for more details.
         Doesn't affect :meth:`fit` method.
-        Note that even if n_jobs=1, low-level parallelism (via Numpy and OpenMP)
-        might be used in some configuration. See :ref:`User Guide <computing>`.
 
     Attributes
     ----------
@@ -383,8 +381,6 @@ class RadiusNeighborsClassifier(RadiusNeighborsMixin, ClassifierMixin, Neighbors
         ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
         ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
         for more details.
-        Note that even if n_jobs=1, low-level parallelism (via Numpy and OpenMP)
-        might be used in some configuration. See :ref:`User Guide <computing>`.
 
     **kwargs : dict
         Additional keyword arguments passed to the constructor.
diff --git a/sklearn/neighbors/_regression.py b/sklearn/neighbors/_regression.py
index 4ed398ba9167d..1bc02fedba212 100644
--- a/sklearn/neighbors/_regression.py
+++ b/sklearn/neighbors/_regression.py
@@ -91,8 +91,6 @@ class KNeighborsRegressor(KNeighborsMixin, RegressorMixin, NeighborsBase):
         ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
         for more details.
         Doesn't affect :meth:`fit` method.
-        Note that even if n_jobs=1, low-level parallelism (via Numpy and OpenMP)
-        might be used in some configuration. See :ref:`User Guide <computing>`.
 
     Attributes
     ----------
@@ -318,8 +316,6 @@ class RadiusNeighborsRegressor(RadiusNeighborsMixin, RegressorMixin, NeighborsBa
         ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
         ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
         for more details.
-        Note that even if n_jobs=1, low-level parallelism (via Numpy and OpenMP)
-        might be used in some configuration. See :ref:`User Guide <computing>`.
 
     Attributes
     ----------

From 02976a7a8e8b84379fb617a5e3e868e3a5ce431a Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Thu, 24 Feb 2022 18:04:49 +0100
Subject: [PATCH 7/7] DOC Do not link to the 'Computing' section

---
 doc/glossary.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/glossary.rst b/doc/glossary.rst
index abd60241568a9..ed5d3ad5e16e8 100644
--- a/doc/glossary.rst
+++ b/doc/glossary.rst
@@ -1560,7 +1560,7 @@ functions or non-estimator constructors.
 
         For more details on the use of ``joblib`` and its interactions with
         scikit-learn, please refer to our :ref:`parallelism notes
-        <parallelism>` and :ref:`User Guide <computing>`.
+        <parallelism>`.
 
     ``pos_label``
         Value with which positive labels must be encoded in binary