From 6c7c896e7e25b59130f91b453ab8fb3c8909b222 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Tue, 4 Apr 2023 11:34:45 +0200 Subject: [PATCH 1/5] PERF set openmp to use only physical cores by default --- sklearn/utils/_openmp_helpers.pyx | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/sklearn/utils/_openmp_helpers.pyx b/sklearn/utils/_openmp_helpers.pyx index 859bc1b5f322c..e04f7795d9841 100644 --- a/sklearn/utils/_openmp_helpers.pyx +++ b/sklearn/utils/_openmp_helpers.pyx @@ -12,7 +12,7 @@ def _openmp_parallelism_enabled(): return SKLEARN_OPENMP_PARALLELISM_ENABLED -cpdef _openmp_effective_n_threads(n_threads=None, only_physical_cores=False): +cpdef _openmp_effective_n_threads(n_threads=None, only_physical_cores=True): """Determine the effective number of threads to be used for OpenMP calls - For ``n_threads = None``, @@ -33,6 +33,15 @@ cpdef _openmp_effective_n_threads(n_threads=None, only_physical_cores=False): - Raise a ValueError for ``n_threads = 0``. + Passing `only_physical_cores=False` flag makes it possible to use extra + threads for SMT/HyperThreading logical cores. It has been empirically + observed that using as many threads as available SMT cores can slightly + improve the performance in some cases, but can severely degrade + performance other times. Therefore it is recommended to use + `only_physical_cores=True` unless an empirical study has been conducted to + assess the impact of SMT on a case-by-case basis (using various input data + shapes, in particular small data shapes). + If scikit-learn is built without OpenMP support, always return 1. """ if n_threads == 0: From 78eca65acc0fdc5cf8a6b948555b4f4f5de9e1d4 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Tue, 4 Apr 2023 12:04:44 +0200 Subject: [PATCH 2/5] DOC document the change --- doc/whats_new/v1.3.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst index 4fede62e61b34..ecc8dcbd7c515 100644 --- a/doc/whats_new/v1.3.rst +++ b/doc/whats_new/v1.3.rst @@ -126,6 +126,17 @@ Changes impacting all modules :pr:`25044` by :user:`Julien Jerphanion `. +- |Enhancement| All estimators that internally rely on OpenMP multi-threading + (via Cython) now use a number of threads equal to the number of physical + (instead of logical) cores by default. In the past, we observed that using as + many threads as logical cores on SMT hosts could sometimes cause severe + performance problems depending on the algorithms and the shape of the data. + Note that it is still possible to manually adjust the number of threads used + by OpenMP as documented in :ref:`parallelism`. + + :pr:`26082` by :user:`Jérémie du Boisberranger ` and + :user:`Olivier Grisel `. + Changelog --------- From 6326c9da9f1523d81ad10e1cae8c397247cf28f9 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Tue, 4 Apr 2023 12:07:07 +0200 Subject: [PATCH 3/5] grammar --- sklearn/utils/_openmp_helpers.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/_openmp_helpers.pyx b/sklearn/utils/_openmp_helpers.pyx index e04f7795d9841..31e04732a86b8 100644 --- a/sklearn/utils/_openmp_helpers.pyx +++ b/sklearn/utils/_openmp_helpers.pyx @@ -33,7 +33,7 @@ cpdef _openmp_effective_n_threads(n_threads=None, only_physical_cores=True): - Raise a ValueError for ``n_threads = 0``. - Passing `only_physical_cores=False` flag makes it possible to use extra + Passing the `only_physical_cores=False` flag makes it possible to use extra threads for SMT/HyperThreading logical cores. It has been empirically observed that using as many threads as available SMT cores can slightly improve the performance in some cases, but can severely degrade From 2451b1077aa3f3a9459332b91b2a4fde7f24ad48 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Tue, 4 Apr 2023 15:25:45 +0200 Subject: [PATCH 4/5] Cache repeated calls to cpu_count. --- sklearn/utils/_openmp_helpers.pyx | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/sklearn/utils/_openmp_helpers.pyx b/sklearn/utils/_openmp_helpers.pyx index e04f7795d9841..2ce1bcf97211b 100644 --- a/sklearn/utils/_openmp_helpers.pyx +++ b/sklearn/utils/_openmp_helpers.pyx @@ -2,6 +2,11 @@ import os from joblib import cpu_count +# Module level cache for cpu_count as we do not expect this to change during +# the lifecycle of a Python program. +_CPU_COUNTS = None + + def _openmp_parallelism_enabled(): """Determines whether scikit-learn has been built with OpenMP @@ -44,6 +49,7 @@ cpdef _openmp_effective_n_threads(n_threads=None, only_physical_cores=True): If scikit-learn is built without OpenMP support, always return 1. """ + global _CPU_COUNTS if n_threads == 0: raise ValueError("n_threads = 0 is invalid") @@ -56,9 +62,12 @@ cpdef _openmp_effective_n_threads(n_threads=None, only_physical_cores=True): # to exceed the number of cpus. max_n_threads = omp_get_max_threads() else: + if _CPU_COUNTS is None: + _CPU_COUNTS = { + opc: cpu_count(only_physical_cores=opc) for opc in [True, False] + } max_n_threads = min( - omp_get_max_threads(), - cpu_count(only_physical_cores=only_physical_cores) + omp_get_max_threads(), _CPU_COUNTS[only_physical_cores] ) if n_threads is None: From a9ad3095dd971719c42621e5a032fa6282e1d826 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 5 Apr 2023 14:17:45 +0200 Subject: [PATCH 5/5] Initialize the dict at module import time --- sklearn/utils/_openmp_helpers.pyx | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/sklearn/utils/_openmp_helpers.pyx b/sklearn/utils/_openmp_helpers.pyx index 2ce1bcf97211b..455743e4232a3 100644 --- a/sklearn/utils/_openmp_helpers.pyx +++ b/sklearn/utils/_openmp_helpers.pyx @@ -3,8 +3,9 @@ from joblib import cpu_count # Module level cache for cpu_count as we do not expect this to change during -# the lifecycle of a Python program. -_CPU_COUNTS = None +# the lifecycle of a Python program. This dictionary is keyed by +# only_physical_cores. +_CPU_COUNTS = {} def _openmp_parallelism_enabled(): @@ -49,7 +50,6 @@ cpdef _openmp_effective_n_threads(n_threads=None, only_physical_cores=True): If scikit-learn is built without OpenMP support, always return 1. """ - global _CPU_COUNTS if n_threads == 0: raise ValueError("n_threads = 0 is invalid") @@ -62,13 +62,12 @@ cpdef _openmp_effective_n_threads(n_threads=None, only_physical_cores=True): # to exceed the number of cpus. max_n_threads = omp_get_max_threads() else: - if _CPU_COUNTS is None: - _CPU_COUNTS = { - opc: cpu_count(only_physical_cores=opc) for opc in [True, False] - } - max_n_threads = min( - omp_get_max_threads(), _CPU_COUNTS[only_physical_cores] - ) + try: + n_cpus = _CPU_COUNTS[only_physical_cores] + except KeyError: + n_cpus = cpu_count(only_physical_cores=only_physical_cores) + _CPU_COUNTS[only_physical_cores] = n_cpus + max_n_threads = min(omp_get_max_threads(), n_cpus) if n_threads is None: return max_n_threads