From b04a9f794b48f4df9e85cb3a0035d574a60dcaff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Wed, 22 Jan 2025 12:09:45 +0100
Subject: [PATCH 01/92] wip

---
 gmm-array-api.py         | 52 ++++++++++++++++++++++++++++++++++++++++
 sklearn/mixture/_base.py | 30 ++++++++++++-----------
 2 files changed, 68 insertions(+), 14 deletions(-)
 create mode 100644 gmm-array-api.py

diff --git a/gmm-array-api.py b/gmm-array-api.py
new file mode 100644
index 0000000000000..ca4145e5ace23
--- /dev/null
+++ b/gmm-array-api.py
@@ -0,0 +1,52 @@
+# %%
+from sklearn.mixture import GaussianMixture
+from sklearn.datasets import make_blobs
+import sklearn
+import numpy as np
+import torch
+
+import os
+os.environ['SCIPY_ARRAY_API'] = '1'
+
+X, y = make_blobs(n_samples=int(1e3), n_features=2, centers=3, random_state=0)
+X, y = torch.asarray(X), torch.asarray(y)
+
+sklearn.set_config(array_api_dispatch=True)
+
+gmm = GaussianMixture(n_components=3, covariance_type="full", random_state=0, init_params="random").fit(X)
+print(gmm.means_)
+print(gmm.covariances_)
+
+# %%
+import matplotlib.pyplot as plt
+import matplotlib as mpl
+fig, ax = plt.subplots()
+
+ax.scatter(X[:, 0], X[:, 1], c=y)
+
+
+def make_ellipses(gmm, ax):
+    colors = ["navy", "turquoise", "darkorange"]
+    for n, color in enumerate(colors):
+        if gmm.covariance_type == "full":
+            covariances = gmm.covariances_[n][:2, :2]
+        elif gmm.covariance_type == "tied":
+            covariances = gmm.covariances_[:2, :2]
+        elif gmm.covariance_type == "diag":
+            covariances = np.diag(gmm.covariances_[n][:2])
+        elif gmm.covariance_type == "spherical":
+            covariances = np.eye(gmm.means_.shape[1]) * gmm.covariances_[n]
+        v, w = np.linalg.eigh(covariances)
+        u = w[0] / np.linalg.norm(w[0])
+        angle = np.arctan2(u[1], u[0])
+        angle = 180 * angle / np.pi  # convert to degrees
+        v = 2.0 * np.sqrt(2.0) * np.sqrt(v)
+        ell = mpl.patches.Ellipse(
+            gmm.means_[n, :2], v[0], v[1], angle=180 + angle, color=color
+        )
+        ell.set_clip_box(ax.bbox)
+        ell.set_alpha(0.5)
+        ax.add_artist(ell)
+        ax.set_aspect("equal", "datalim")
+
+make_ellipses(gmm, ax)
diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index dd50d39b4fdb0..232077d81a840 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -18,6 +18,7 @@
 from ..utils import check_random_state
 from ..utils._param_validation import Interval, StrOptions
 from ..utils.validation import check_is_fitted, validate_data
+from ..utils._array_api import get_namespace
 
 
 def _check_shape(param, param_shape, name):
@@ -95,7 +96,7 @@ def _check_parameters(self, X):
         """
         pass
 
-    def _initialize_parameters(self, X, random_state):
+    def _initialize_parameters(self, X, random_state, xp):
         """Initialize the model parameters.
 
         Parameters
@@ -109,7 +110,7 @@ def _initialize_parameters(self, X, random_state):
         n_samples, _ = X.shape
 
         if self.init_params == "kmeans":
-            resp = np.zeros((n_samples, self.n_components), dtype=X.dtype)
+            resp = xp.zeros((n_samples, self.n_components), dtype=X.dtype)
             label = (
                 cluster.KMeans(
                     n_clusters=self.n_components, n_init=1, random_state=random_state
@@ -117,26 +118,26 @@ def _initialize_parameters(self, X, random_state):
                 .fit(X)
                 .labels_
             )
-            resp[np.arange(n_samples), label] = 1
+            resp[xp.arange(n_samples), label] = 1
         elif self.init_params == "random":
-            resp = np.asarray(
+            resp = xp.asarray(
                 random_state.uniform(size=(n_samples, self.n_components)), dtype=X.dtype
             )
-            resp /= resp.sum(axis=1)[:, np.newaxis]
+            resp /= resp.sum(axis=1)[:, xp.newaxis]
         elif self.init_params == "random_from_data":
-            resp = np.zeros((n_samples, self.n_components), dtype=X.dtype)
+            resp = xp.zeros((n_samples, self.n_components), dtype=X.dtype)
             indices = random_state.choice(
                 n_samples, size=self.n_components, replace=False
             )
-            resp[indices, np.arange(self.n_components)] = 1
+            resp[indices, xp.arange(self.n_components)] = 1
         elif self.init_params == "k-means++":
-            resp = np.zeros((n_samples, self.n_components), dtype=X.dtype)
+            resp = xp.zeros((n_samples, self.n_components), dtype=X.dtype)
             _, indices = kmeans_plusplus(
                 X,
                 self.n_components,
                 random_state=random_state,
             )
-            resp[indices, np.arange(self.n_components)] = 1
+            resp[indices, xp.arange(self.n_components)] = 1
 
         self._initialize(X, resp)
 
@@ -210,7 +211,8 @@ def fit_predict(self, X, y=None):
         labels : array, shape (n_samples,)
             Component labels.
         """
-        X = validate_data(self, X, dtype=[np.float64, np.float32], ensure_min_samples=2)
+        xp, _ = get_namespace(X)
+        X = validate_data(self, X, dtype=[xp.float64, xp.float32], ensure_min_samples=2)
         if X.shape[0] < self.n_components:
             raise ValueError(
                 "Expected n_samples >= n_components "
@@ -223,7 +225,7 @@ def fit_predict(self, X, y=None):
         do_init = not (self.warm_start and hasattr(self, "converged_"))
         n_init = self.n_init if do_init else 1
 
-        max_lower_bound = -np.inf
+        max_lower_bound = -xp.inf
         self.converged_ = False
 
         random_state = check_random_state(self.random_state)
@@ -233,9 +235,9 @@ def fit_predict(self, X, y=None):
             self._print_verbose_msg_init_beg(init)
 
             if do_init:
-                self._initialize_parameters(X, random_state)
+                self._initialize_parameters(X, random_state, xp)
 
-            lower_bound = -np.inf if do_init else self.lower_bound_
+            lower_bound = -xp.inf if do_init else self.lower_bound_
 
             if self.max_iter == 0:
                 best_params = self._get_parameters()
@@ -258,7 +260,7 @@ def fit_predict(self, X, y=None):
 
                 self._print_verbose_msg_init_end(lower_bound, converged)
 
-                if lower_bound > max_lower_bound or max_lower_bound == -np.inf:
+                if lower_bound > max_lower_bound or max_lower_bound == -xp.inf:
                     max_lower_bound = lower_bound
                     best_params = self._get_parameters()
                     best_n_iter = n_iter

From e6ba4e4f5d467aa39f7c8d88e5c2af3ebc06f17c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Wed, 22 Jan 2025 16:15:46 +0100
Subject: [PATCH 02/92] wip

---
 gmm-array-api.py         | 20 +++++++++++++-------
 sklearn/mixture/_base.py |  2 +-
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/gmm-array-api.py b/gmm-array-api.py
index ca4145e5ace23..b87b4e819555a 100644
--- a/gmm-array-api.py
+++ b/gmm-array-api.py
@@ -1,25 +1,30 @@
 # %%
-from sklearn.mixture import GaussianMixture
-from sklearn.datasets import make_blobs
-import sklearn
+import os
+
 import numpy as np
 import torch
 
-import os
-os.environ['SCIPY_ARRAY_API'] = '1'
+import sklearn
+from sklearn.datasets import make_blobs
+from sklearn.mixture import GaussianMixture
+
+os.environ["SCIPY_ARRAY_API"] = "1"
 
 X, y = make_blobs(n_samples=int(1e3), n_features=2, centers=3, random_state=0)
 X, y = torch.asarray(X), torch.asarray(y)
 
 sklearn.set_config(array_api_dispatch=True)
 
-gmm = GaussianMixture(n_components=3, covariance_type="full", random_state=0, init_params="random").fit(X)
+gmm = GaussianMixture(
+    n_components=3, covariance_type="full", random_state=0, init_params="random"
+).fit(X)
 print(gmm.means_)
 print(gmm.covariances_)
 
 # %%
-import matplotlib.pyplot as plt
 import matplotlib as mpl
+import matplotlib.pyplot as plt
+
 fig, ax = plt.subplots()
 
 ax.scatter(X[:, 0], X[:, 1], c=y)
@@ -49,4 +54,5 @@ def make_ellipses(gmm, ax):
         ax.add_artist(ell)
         ax.set_aspect("equal", "datalim")
 
+
 make_ellipses(gmm, ax)
diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index 232077d81a840..444f25b0512ba 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -16,9 +16,9 @@
 from ..cluster import kmeans_plusplus
 from ..exceptions import ConvergenceWarning
 from ..utils import check_random_state
+from ..utils._array_api import get_namespace
 from ..utils._param_validation import Interval, StrOptions
 from ..utils.validation import check_is_fitted, validate_data
-from ..utils._array_api import get_namespace
 
 
 def _check_shape(param, param_shape, name):

From 2226a55f1acb5e53f78bdf1e7984a8f3a5e42a0b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Wed, 5 Feb 2025 11:17:06 +0100
Subject: [PATCH 03/92] stuck on linalg.cholesky array API support

---
 sklearn/mixture/_gaussian_mixture.py | 32 +++++++++++++++++-----------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index a5b3a5ae5c172..79b2f74eb7f34 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -7,6 +7,7 @@
 from scipy import linalg
 
 from ..utils import check_array
+from ..utils._array_api import get_namespace
 from ..utils._param_validation import StrOptions
 from ..utils.extmath import row_norms
 from ._base import BaseMixture, _check_shape
@@ -170,12 +171,14 @@ def _estimate_gaussian_covariances_full(resp, X, nk, means, reg_covar):
     covariances : array, shape (n_components, n_features, n_features)
         The covariance matrix of the current components.
     """
+    xp, _ = get_namespace(X)
     n_components, n_features = means.shape
-    covariances = np.empty((n_components, n_features, n_features), dtype=X.dtype)
+    covariances = xp.empty((n_components, n_features, n_features), dtype=X.dtype)
     for k in range(n_components):
         diff = X - means[k]
-        covariances[k] = np.dot(resp[:, k] * diff.T, diff) / nk[k]
-        covariances[k].flat[:: n_features + 1] += reg_covar
+        covariances[k] = ((resp[:, k] * diff.T) @ diff) / nk[k]
+        my_flat = xp.reshape(covariances[k], (-1,))
+        my_flat[:: n_features + 1] += reg_covar
     return covariances
 
 
@@ -284,8 +287,9 @@ def _estimate_gaussian_parameters(X, resp, reg_covar, covariance_type):
         The covariance matrix of the current components.
         The shape depends of the covariance_type.
     """
-    nk = resp.sum(axis=0) + 10 * np.finfo(resp.dtype).eps
-    means = np.dot(resp.T, X) / nk[:, np.newaxis]
+    xp, _ = get_namespace(X)
+    nk = resp.sum(axis=0) + 10 * xp.finfo(resp.dtype).eps
+    means = (resp.T @ X) / nk[:, xp.newaxis]
     covariances = {
         "full": _estimate_gaussian_covariances_full,
         "tied": _estimate_gaussian_covariances_tied,
@@ -313,6 +317,8 @@ def _compute_precision_cholesky(covariances, covariance_type):
         The cholesky decomposition of sample precisions of the current
         components. The shape depends of the covariance_type.
     """
+    xp, _ = get_namespace(covariances)
+
     estimate_precision_error_message = (
         "Fitting the mixture model failed because some components have "
         "ill-defined empirical covariance (for instance caused by singleton "
@@ -320,7 +326,7 @@ def _compute_precision_cholesky(covariances, covariance_type):
         "increase reg_covar, or scale the input data."
     )
     dtype = covariances.dtype
-    if dtype == np.float32:
+    if dtype == xp.float32:
         estimate_precision_error_message += (
             " The numerical accuracy can also be improved by passing float64"
             " data instead of float32."
@@ -328,14 +334,14 @@ def _compute_precision_cholesky(covariances, covariance_type):
 
     if covariance_type == "full":
         n_components, n_features, _ = covariances.shape
-        precisions_chol = np.empty((n_components, n_features, n_features), dtype=dtype)
+        precisions_chol = xp.empty((n_components, n_features, n_features), dtype=dtype)
         for k, covariance in enumerate(covariances):
             try:
                 cov_chol = linalg.cholesky(covariance, lower=True)
             except linalg.LinAlgError:
                 raise ValueError(estimate_precision_error_message)
             precisions_chol[k] = linalg.solve_triangular(
-                cov_chol, np.eye(n_features, dtype=dtype), lower=True
+                cov_chol, xp.eye(n_features, dtype=dtype), lower=True
             ).T
     elif covariance_type == "tied":
         _, n_features = covariances.shape
@@ -344,12 +350,12 @@ def _compute_precision_cholesky(covariances, covariance_type):
         except linalg.LinAlgError:
             raise ValueError(estimate_precision_error_message)
         precisions_chol = linalg.solve_triangular(
-            cov_chol, np.eye(n_features, dtype=dtype), lower=True
+            cov_chol, xp.eye(n_features, dtype=dtype), lower=True
         ).T
     else:
-        if np.any(np.less_equal(covariances, 0.0)):
+        if xp.any(xp.less_equal(covariances, 0.0)):
             raise ValueError(estimate_precision_error_message)
-        precisions_chol = 1.0 / np.sqrt(covariances)
+        precisions_chol = 1.0 / xp.sqrt(covariances)
     return precisions_chol
 
 
@@ -759,7 +765,7 @@ def _check_parameters(self, X):
                 n_features,
             )
 
-    def _initialize_parameters(self, X, random_state):
+    def _initialize_parameters(self, X, random_state, xp):
         # If all the initial parameters are all provided, then there is no need to run
         # the initialization.
         compute_resp = (
@@ -768,7 +774,7 @@ def _initialize_parameters(self, X, random_state):
             or self.precisions_init is None
         )
         if compute_resp:
-            super()._initialize_parameters(X, random_state)
+            super()._initialize_parameters(X, random_state, xp)
         else:
             self._initialize(X, None)
 

From b1fdee7b0b098efc7684a60c98869d02ad7b9f93 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Wed, 5 Feb 2025 17:40:42 +0100
Subject: [PATCH 04/92] a bit further with xp.cholesky but now
 linalg.solve_triangular

---
 sklearn/mixture/_gaussian_mixture.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index 79b2f74eb7f34..8f4be903eec2c 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -337,10 +337,10 @@ def _compute_precision_cholesky(covariances, covariance_type):
         precisions_chol = xp.empty((n_components, n_features, n_features), dtype=dtype)
         for k, covariance in enumerate(covariances):
             try:
-                cov_chol = linalg.cholesky(covariance, lower=True)
-            except linalg.LinAlgError:
+                cov_chol = xp.linalg.cholesky(covariance, upper=False)
+            except xp.linalg.LinAlgError:
                 raise ValueError(estimate_precision_error_message)
-            precisions_chol[k] = linalg.solve_triangular(
+            precisions_chol[k] = xp.linalg.solve_triangular(
                 cov_chol, xp.eye(n_features, dtype=dtype), lower=True
             ).T
     elif covariance_type == "tied":

From 14fb0bae31decb1ecc32aac38bd1df83b5f81d0a Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Fri, 14 Feb 2025 11:54:53 +0100
Subject: [PATCH 05/92] more array api

---
 gmm-array-api.py                     |  4 ++-
 sklearn/mixture/_gaussian_mixture.py | 45 +++++++++++++++-------------
 2 files changed, 28 insertions(+), 21 deletions(-)

diff --git a/gmm-array-api.py b/gmm-array-api.py
index b87b4e819555a..916e9728faeda 100644
--- a/gmm-array-api.py
+++ b/gmm-array-api.py
@@ -11,7 +11,7 @@
 os.environ["SCIPY_ARRAY_API"] = "1"
 
 X, y = make_blobs(n_samples=int(1e3), n_features=2, centers=3, random_state=0)
-X, y = torch.asarray(X), torch.asarray(y)
+X_torch, y_torch = torch.asarray(X), torch.asarray(y)
 
 sklearn.set_config(array_api_dispatch=True)
 
@@ -56,3 +56,5 @@ def make_ellipses(gmm, ax):
 
 
 make_ellipses(gmm, ax)
+
+# %%
diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index 8f4be903eec2c..53a764ea1f2fc 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -340,8 +340,8 @@ def _compute_precision_cholesky(covariances, covariance_type):
                 cov_chol = xp.linalg.cholesky(covariance, upper=False)
             except xp.linalg.LinAlgError:
                 raise ValueError(estimate_precision_error_message)
-            precisions_chol[k] = xp.linalg.solve_triangular(
-                cov_chol, xp.eye(n_features, dtype=dtype), lower=True
+            precisions_chol[k] = xp.linalg.solve(
+                cov_chol, xp.eye(n_features, dtype=dtype)
             ).T
     elif covariance_type == "tied":
         _, n_features = covariances.shape
@@ -438,20 +438,21 @@ def _compute_log_det_cholesky(matrix_chol, covariance_type, n_features):
     log_det_precision_chol : array-like of shape (n_components,)
         The determinant of the precision matrix for each component.
     """
+    xp, _ = get_namespace(matrix_chol)
     if covariance_type == "full":
         n_components, _, _ = matrix_chol.shape
-        log_det_chol = np.sum(
-            np.log(matrix_chol.reshape(n_components, -1)[:, :: n_features + 1]), axis=1
+        log_det_chol = xp.sum(
+            xp.log(matrix_chol.reshape(n_components, -1)[:, :: n_features + 1]), axis=1
         )
 
     elif covariance_type == "tied":
-        log_det_chol = np.sum(np.log(np.diag(matrix_chol)))
+        log_det_chol = xp.sum(xp.log(xp.diagonal(matrix_chol)))
 
     elif covariance_type == "diag":
-        log_det_chol = np.sum(np.log(matrix_chol), axis=1)
+        log_det_chol = xp.sum(xp.log(matrix_chol), axis=1)
 
     else:
-        log_det_chol = n_features * np.log(matrix_chol)
+        log_det_chol = n_features * xp.log(matrix_chol)
 
     return log_det_chol
 
@@ -478,6 +479,7 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type):
     -------
     log_prob : array, shape (n_samples, n_components)
     """
+    xp, _ = get_namespace(X, means, precisions_chol)
     n_samples, n_features = X.shape
     n_components, _ = means.shape
     # The determinant of the precision matrix from the Cholesky decomposition
@@ -487,35 +489,38 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type):
     log_det = _compute_log_det_cholesky(precisions_chol, covariance_type, n_features)
 
     if covariance_type == "full":
-        log_prob = np.empty((n_samples, n_components), dtype=X.dtype)
+        log_prob = xp.empty((n_samples, n_components), dtype=X.dtype)
         for k, (mu, prec_chol) in enumerate(zip(means, precisions_chol)):
-            y = np.dot(X, prec_chol) - np.dot(mu, prec_chol)
-            log_prob[:, k] = np.sum(np.square(y), axis=1)
+            y = (X @ prec_chol) - (mu @ prec_chol)
+            log_prob[:, k] = xp.sum(xp.square(y), axis=1)
 
     elif covariance_type == "tied":
-        log_prob = np.empty((n_samples, n_components), dtype=X.dtype)
+        log_prob = xp.empty((n_samples, n_components), dtype=X.dtype)
         for k, mu in enumerate(means):
-            y = np.dot(X, precisions_chol) - np.dot(mu, precisions_chol)
-            log_prob[:, k] = np.sum(np.square(y), axis=1)
+            y = (X @ precisions_chol) - (mu @ precisions_chol)
+            log_prob[:, k] = xp.sum(xp.square(y), axis=1)
 
     elif covariance_type == "diag":
         precisions = precisions_chol**2
         log_prob = (
-            np.sum((means**2 * precisions), 1)
-            - 2.0 * np.dot(X, (means * precisions).T)
-            + np.dot(X**2, precisions.T)
+            xp.sum((means**2 * precisions), 1)
+            - 2.0 * (X @ (means * precisions).T)
+            + (X**2 @ precisions.T)
         )
 
     elif covariance_type == "spherical":
         precisions = precisions_chol**2
         log_prob = (
-            np.sum(means**2, 1) * precisions
-            - 2 * np.dot(X, means.T * precisions)
-            + np.outer(row_norms(X, squared=True), precisions)
+            xp.sum(means**2, 1) * precisions
+            - 2 * (X @ means.T * precisions)
+            + xp.outer(row_norms(X, squared=True), precisions)
         )
     # Since we are using the precision of the Cholesky decomposition,
     # `- 0.5 * log_det_precision` becomes `+ log_det_precision_chol`
-    return -0.5 * (n_features * np.log(2 * np.pi).astype(X.dtype) + log_prob) + log_det
+    return (
+        -0.5 * (n_features * xp.log(xp.asarray(2 * xp.pi, dtype=X.dtype)) + log_prob)
+        + log_det
+    )
 
 
 class GaussianMixture(BaseMixture):

From 6010ff79a1bfa0092b2ade04f9a6d039d4d8455b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Wed, 19 Feb 2025 11:42:05 +0100
Subject: [PATCH 06/92] wip (problem with weights as numpy arrays)

---
 examples/mixture/plot_gmm_covariances.py |  3 +++
 gmm-array-api.py                         | 12 +++++++-----
 sklearn/mixture/_base.py                 | 11 +++++++----
 sklearn/mixture/_gaussian_mixture.py     | 16 ++++++++++------
 4 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/examples/mixture/plot_gmm_covariances.py b/examples/mixture/plot_gmm_covariances.py
index 91a26f518f332..5d752cc15dc9b 100644
--- a/examples/mixture/plot_gmm_covariances.py
+++ b/examples/mixture/plot_gmm_covariances.py
@@ -1,3 +1,4 @@
+# %%
 """
 ===============
 GMM covariances
@@ -136,3 +137,5 @@ def make_ellipses(gmm, ax):
 
 
 plt.show()
+
+# %%
diff --git a/gmm-array-api.py b/gmm-array-api.py
index 916e9728faeda..1541d44843c62 100644
--- a/gmm-array-api.py
+++ b/gmm-array-api.py
@@ -5,23 +5,25 @@
 import torch
 
 import sklearn
-from sklearn.datasets import make_blobs
+from sklearn.datasets import make_blobs, load_iris
 from sklearn.mixture import GaussianMixture
+import array_api_strict
 
 os.environ["SCIPY_ARRAY_API"] = "1"
 
 X, y = make_blobs(n_samples=int(1e3), n_features=2, centers=3, random_state=0)
-X_torch, y_torch = torch.asarray(X), torch.asarray(y)
+# X, y = torch.asarray(X), torch.asarray(y)
+X, y = array_api_strict.asarray(X), array_api_strict.asarray(y)
 
 sklearn.set_config(array_api_dispatch=True)
 
 gmm = GaussianMixture(
-    n_components=3, covariance_type="full", random_state=0, init_params="random"
+    n_components=3, covariance_type="diag", random_state=0, init_params="random",
+    tol=1e-5, max_iter=1000
 ).fit(X)
 print(gmm.means_)
 print(gmm.covariances_)
 
-# %%
 import matplotlib as mpl
 import matplotlib.pyplot as plt
 
@@ -57,4 +59,4 @@ def make_ellipses(gmm, ax):
 
 make_ellipses(gmm, ax)
 
-# %%
+ # %%
diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index 444f25b0512ba..e4f5b51e3b891 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -123,7 +123,7 @@ def _initialize_parameters(self, X, random_state, xp):
             resp = xp.asarray(
                 random_state.uniform(size=(n_samples, self.n_components)), dtype=X.dtype
             )
-            resp /= resp.sum(axis=1)[:, xp.newaxis]
+            resp /= xp.sum(resp, axis=1)[:, xp.newaxis]
         elif self.init_params == "random_from_data":
             resp = xp.zeros((n_samples, self.n_components), dtype=X.dtype)
             indices = random_state.choice(
@@ -306,8 +306,9 @@ def _e_step(self, X):
             Logarithm of the posterior probabilities (or responsibilities) of
             the point of each sample in X.
         """
+        xp, _ = get_namespace(X)
         log_prob_norm, log_resp = self._estimate_log_prob_resp(X)
-        return np.mean(log_prob_norm), log_resp
+        return xp.mean(log_prob_norm), log_resp
 
     @abstractmethod
     def _m_step(self, X, log_resp):
@@ -403,8 +404,9 @@ def predict_proba(self, X):
         """
         check_is_fitted(self)
         X = validate_data(self, X, reset=False)
+        xp, _ = get_namespace(X)
         _, log_resp = self._estimate_log_prob_resp(X)
-        return np.exp(log_resp)
+        return xp.exp(log_resp)
 
     def sample(self, n_samples=1):
         """Generate random samples from the fitted Gaussian distribution.
@@ -526,11 +528,12 @@ def _estimate_log_prob_resp(self, X):
         log_responsibilities : array, shape (n_samples, n_components)
             logarithm of the responsibilities
         """
+        xp, _ = get_namespace(X)
         weighted_log_prob = self._estimate_weighted_log_prob(X)
         log_prob_norm = logsumexp(weighted_log_prob, axis=1)
         with np.errstate(under="ignore"):
             # ignore underflow
-            log_resp = weighted_log_prob - log_prob_norm[:, np.newaxis]
+            log_resp = weighted_log_prob - log_prob_norm[:, xp.newaxis]
         return log_prob_norm, log_resp
 
     def _print_verbose_msg_init_beg(self, n_init):
diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index 53a764ea1f2fc..2820a954f35b3 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -230,7 +230,8 @@ def _estimate_gaussian_covariances_diag(resp, X, nk, means, reg_covar):
     covariances : array, shape (n_components, n_features)
         The covariance vector of the current components.
     """
-    avg_X2 = np.dot(resp.T, X * X) / nk[:, np.newaxis]
+    xp, _ = get_namespace(X)
+    avg_X2 = (resp.T @ (X * X)) / nk[:, xp.newaxis]
     avg_means2 = means**2
     return avg_X2 - avg_means2 + reg_covar
 
@@ -288,7 +289,7 @@ def _estimate_gaussian_parameters(X, resp, reg_covar, covariance_type):
         The shape depends of the covariance_type.
     """
     xp, _ = get_namespace(X)
-    nk = resp.sum(axis=0) + 10 * xp.finfo(resp.dtype).eps
+    nk = xp.sum(resp, axis=0) + 10 * xp.finfo(resp.dtype).eps
     means = (resp.T @ X) / nk[:, xp.newaxis]
     covariances = {
         "full": _estimate_gaussian_covariances_full,
@@ -353,7 +354,7 @@ def _compute_precision_cholesky(covariances, covariance_type):
             cov_chol, xp.eye(n_features, dtype=dtype), lower=True
         ).T
     else:
-        if xp.any(xp.less_equal(covariances, 0.0)):
+        if xp.any(covariances <= 0.0):
             raise ValueError(estimate_precision_error_message)
         precisions_chol = 1.0 / xp.sqrt(covariances)
     return precisions_chol
@@ -503,7 +504,7 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type):
     elif covariance_type == "diag":
         precisions = precisions_chol**2
         log_prob = (
-            xp.sum((means**2 * precisions), 1)
+            xp.sum((means**2 * precisions), axis=1)
             - 2.0 * (X @ (means * precisions).T)
             + (X**2 @ precisions.T)
         )
@@ -814,6 +815,7 @@ def _initialize(self, X, resp):
                 self.precisions_init, self.covariance_type
             )
 
+
     def _m_step(self, X, log_resp):
         """M step.
 
@@ -825,8 +827,9 @@ def _m_step(self, X, log_resp):
             Logarithm of the posterior probabilities (or responsibilities) of
             the point of each sample in X.
         """
+        xp, _ = get_namespace(X, log_resp)
         self.weights_, self.means_, self.covariances_ = _estimate_gaussian_parameters(
-            X, np.exp(log_resp), self.reg_covar, self.covariance_type
+            X, xp.exp(log_resp), self.reg_covar, self.covariance_type
         )
         self.weights_ /= self.weights_.sum()
         self.precisions_cholesky_ = _compute_precision_cholesky(
@@ -839,7 +842,8 @@ def _estimate_log_prob(self, X):
         )
 
     def _estimate_log_weights(self):
-        return np.log(self.weights_)
+        xp, _ = get_namespace(self.weights_)
+        return xp.log(self.weights_)
 
     def _compute_lower_bound(self, _, log_prob_norm):
         return log_prob_norm

From aa2a3831f3a1fe9d4fc41879ee47727e1c1a3996 Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Fri, 21 Feb 2025 14:57:27 +0100
Subject: [PATCH 07/92] array api for covariance_type='diag' and
 init_params='random'

---
 gmm-array-api.py                     | 19 +++++++++++++------
 sklearn/mixture/_base.py             |  2 +-
 sklearn/mixture/_gaussian_mixture.py |  3 +--
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/gmm-array-api.py b/gmm-array-api.py
index 1541d44843c62..d96b6a14bd497 100644
--- a/gmm-array-api.py
+++ b/gmm-array-api.py
@@ -1,13 +1,12 @@
 # %%
 import os
 
+import array_api_strict
 import numpy as np
-import torch
 
 import sklearn
-from sklearn.datasets import make_blobs, load_iris
+from sklearn.datasets import make_blobs
 from sklearn.mixture import GaussianMixture
-import array_api_strict
 
 os.environ["SCIPY_ARRAY_API"] = "1"
 
@@ -18,8 +17,12 @@
 sklearn.set_config(array_api_dispatch=True)
 
 gmm = GaussianMixture(
-    n_components=3, covariance_type="diag", random_state=0, init_params="random",
-    tol=1e-5, max_iter=1000
+    n_components=3,
+    covariance_type="diag",
+    random_state=0,
+    init_params="random",
+    tol=1e-5,
+    max_iter=1000,
 ).fit(X)
 print(gmm.means_)
 print(gmm.covariances_)
@@ -29,10 +32,14 @@
 
 fig, ax = plt.subplots()
 
+X = np.asarray(X)
+y = np.asarray(y)
+
 ax.scatter(X[:, 0], X[:, 1], c=y)
 
 
 def make_ellipses(gmm, ax):
+    gmm.covariances_ = np.asarray(gmm.covariances_)
     colors = ["navy", "turquoise", "darkorange"]
     for n, color in enumerate(colors):
         if gmm.covariance_type == "full":
@@ -59,4 +66,4 @@ def make_ellipses(gmm, ax):
 
 make_ellipses(gmm, ax)
 
- # %%
+# %%
diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index e4f5b51e3b891..f24db803a29ab 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -288,7 +288,7 @@ def fit_predict(self, X, y=None):
         # for any value of max_iter and tol (and any random_state).
         _, log_resp = self._e_step(X)
 
-        return log_resp.argmax(axis=1)
+        return xp.argmax(log_resp, axis=1)
 
     def _e_step(self, X):
         """E step.
diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index 2820a954f35b3..179a84aede43e 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -815,7 +815,6 @@ def _initialize(self, X, resp):
                 self.precisions_init, self.covariance_type
             )
 
-
     def _m_step(self, X, log_resp):
         """M step.
 
@@ -831,7 +830,7 @@ def _m_step(self, X, log_resp):
         self.weights_, self.means_, self.covariances_ = _estimate_gaussian_parameters(
             X, xp.exp(log_resp), self.reg_covar, self.covariance_type
         )
-        self.weights_ /= self.weights_.sum()
+        self.weights_ /= xp.sum(self.weights_)
         self.precisions_cholesky_ = _compute_precision_cholesky(
             self.covariances_, self.covariance_type
         )

From de4f3a592646729873838b5e9ed414e482cb7ba8 Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Fri, 21 Feb 2025 15:19:19 +0100
Subject: [PATCH 08/92] add simple test

---
 .../mixture/tests/test_gaussian_mixture.py    | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index e8144ada64f67..5ef5573d4e372 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -17,6 +17,7 @@
 from sklearn.cluster import KMeans
 from sklearn.covariance import EmpiricalCovariance
 from sklearn.datasets import make_spd_matrix
+from sklearn.datasets._samples_generator import make_blobs
 from sklearn.exceptions import ConvergenceWarning, NotFittedError
 from sklearn.metrics.cluster import adjusted_rand_score
 from sklearn.mixture import GaussianMixture
@@ -29,7 +30,9 @@
     _estimate_gaussian_covariances_tied,
     _estimate_gaussian_parameters,
 )
+from sklearn.utils._array_api import yield_namespace_device_dtype_combinations
 from sklearn.utils._testing import (
+    _array_api_for_tests,
     assert_allclose,
     assert_almost_equal,
     assert_array_almost_equal,
@@ -1470,3 +1473,22 @@ def test_gaussian_mixture_all_init_does_not_estimate_gaussian_parameters(
     # The initial gaussian parameters are not estimated. They are estimated for every
     # m_step.
     assert mock.call_count == gm.n_iter_
+
+
+@pytest.mark.parametrize(
+    "array_namespace, device, dtype", yield_namespace_device_dtype_combinations()
+)
+def test_gaussian_mixture_array_api_compliance(array_namespace, device, dtype):
+    X, y = make_blobs(n_samples=int(1e3), n_features=2, centers=3, random_state=0)
+    xp = _array_api_for_tests(array_namespace, device)
+    X = xp.asarray(X, device=device)
+    y = xp.asarray(y, device=device)
+    with sklearn.config_context(array_api_dispatch=True):
+        gmm = GaussianMixture(
+            n_components=3,
+            covariance_type="diag",
+            random_state=0,
+            init_params="random",
+            tol=1e-5,
+            max_iter=1000,
+        ).fit(X)

From 7974931d9c913ba1e5ab605cf0179ecc35ca6369 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 21 Feb 2025 15:28:15 +0100
Subject: [PATCH 09/92] Add comments about tricky bits

---
 sklearn/mixture/_gaussian_mixture.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index 179a84aede43e..47ca220cfe7d2 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -338,9 +338,14 @@ def _compute_precision_cholesky(covariances, covariance_type):
         precisions_chol = xp.empty((n_components, n_features, n_features), dtype=dtype)
         for k, covariance in enumerate(covariances):
             try:
+                # TODO we are using xp.linalg instead of scipy.linalg.cholesky,
+                # maybe separate branches for array API and numpy?
                 cov_chol = xp.linalg.cholesky(covariance, upper=False)
             except xp.linalg.LinAlgError:
                 raise ValueError(estimate_precision_error_message)
+
+            # TODO we are using xp.linalg.solve instead of scipy.linalg.solve_triangular
+            # probably separate branches for array API and numpy?
             precisions_chol[k] = xp.linalg.solve(
                 cov_chol, xp.eye(n_features, dtype=dtype)
             ).T

From 08e5f9b88cad823344b8c96125c2f353ab62a18b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 21 Feb 2025 15:42:30 +0100
Subject: [PATCH 10/92] lint

---
 gmm-array-api.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/gmm-array-api.py b/gmm-array-api.py
index d96b6a14bd497..86fddbeeb97b1 100644
--- a/gmm-array-api.py
+++ b/gmm-array-api.py
@@ -1,7 +1,12 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 import os
 
 import array_api_strict
+import matplotlib as mpl
+import matplotlib.pyplot as plt
 import numpy as np
 
 import sklearn
@@ -27,9 +32,6 @@
 print(gmm.means_)
 print(gmm.covariances_)
 
-import matplotlib as mpl
-import matplotlib.pyplot as plt
-
 fig, ax = plt.subplots()
 
 X = np.asarray(X)

From 0f525efa104cf50263daf25f627a857bb5d814f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 21 Feb 2025 16:01:13 +0100
Subject: [PATCH 11/92] one more comment

---
 gmm-array-api.py         | 1 +
 sklearn/mixture/_base.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/gmm-array-api.py b/gmm-array-api.py
index 86fddbeeb97b1..f0da95a8aca9e 100644
--- a/gmm-array-api.py
+++ b/gmm-array-api.py
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 
 # %%
+
 import os
 
 import array_api_strict
diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index f24db803a29ab..4e4c906a054c7 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -530,6 +530,7 @@ def _estimate_log_prob_resp(self, X):
         """
         xp, _ = get_namespace(X)
         weighted_log_prob = self._estimate_weighted_log_prob(X)
+        # TODO scipy.special.logsumexp needs scipy >= 1.15 for array API support
         log_prob_norm = logsumexp(weighted_log_prob, axis=1)
         with np.errstate(under="ignore"):
             # ignore underflow

From 4801e2bc48466d316485a80278ddc4d9f588af01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 28 Feb 2025 14:21:58 +0100
Subject: [PATCH 12/92] revert unwanted change

---
 examples/mixture/plot_gmm_covariances.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/examples/mixture/plot_gmm_covariances.py b/examples/mixture/plot_gmm_covariances.py
index 5d752cc15dc9b..91a26f518f332 100644
--- a/examples/mixture/plot_gmm_covariances.py
+++ b/examples/mixture/plot_gmm_covariances.py
@@ -1,4 +1,3 @@
-# %%
 """
 ===============
 GMM covariances
@@ -137,5 +136,3 @@ def make_ellipses(gmm, ax):
 
 
 plt.show()
-
-# %%

From de1343c72a1a6de908d9e8684e4ba8a3f7e30a68 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 28 Feb 2025 14:41:35 +0100
Subject: [PATCH 13/92] fix test_bayesian_mixture

---
 sklearn/mixture/tests/test_bayesian_mixture.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/sklearn/mixture/tests/test_bayesian_mixture.py b/sklearn/mixture/tests/test_bayesian_mixture.py
index d17e6710ee5a7..04e6cda745865 100644
--- a/sklearn/mixture/tests/test_bayesian_mixture.py
+++ b/sklearn/mixture/tests/test_bayesian_mixture.py
@@ -12,6 +12,7 @@
 from sklearn.mixture import BayesianGaussianMixture
 from sklearn.mixture._bayesian_mixture import _log_dirichlet_norm, _log_wishart_norm
 from sklearn.mixture.tests.test_gaussian_mixture import RandomData
+from sklearn.utils._array_api import get_namespace
 from sklearn.utils._testing import (
     assert_almost_equal,
     assert_array_equal,
@@ -259,6 +260,7 @@ def test_compare_covar_type():
     rand_data = RandomData(rng, scale=7)
     X = rand_data.X["full"]
     n_components = rand_data.n_components
+    xp, _ = get_namespace(X)
 
     for prior_type in PRIOR_TYPE:
         # Computation of the full_covariance
@@ -271,7 +273,7 @@ def test_compare_covar_type():
             tol=1e-7,
         )
         bgmm._check_parameters(X)
-        bgmm._initialize_parameters(X, np.random.RandomState(0))
+        bgmm._initialize_parameters(X, np.random.RandomState(0), xp=xp)
         full_covariances = (
             bgmm.covariances_ * bgmm.degrees_of_freedom_[:, np.newaxis, np.newaxis]
         )
@@ -286,7 +288,7 @@ def test_compare_covar_type():
             tol=1e-7,
         )
         bgmm._check_parameters(X)
-        bgmm._initialize_parameters(X, np.random.RandomState(0))
+        bgmm._initialize_parameters(X, np.random.RandomState(0), xp=xp)
 
         tied_covariance = bgmm.covariances_ * bgmm.degrees_of_freedom_
         assert_almost_equal(tied_covariance, np.mean(full_covariances, 0))
@@ -301,7 +303,7 @@ def test_compare_covar_type():
             tol=1e-7,
         )
         bgmm._check_parameters(X)
-        bgmm._initialize_parameters(X, np.random.RandomState(0))
+        bgmm._initialize_parameters(X, np.random.RandomState(0), xp=xp)
 
         diag_covariances = bgmm.covariances_ * bgmm.degrees_of_freedom_[:, np.newaxis]
         assert_almost_equal(
@@ -318,7 +320,7 @@ def test_compare_covar_type():
             tol=1e-7,
         )
         bgmm._check_parameters(X)
-        bgmm._initialize_parameters(X, np.random.RandomState(0))
+        bgmm._initialize_parameters(X, np.random.RandomState(0), xp=xp)
 
         spherical_covariances = bgmm.covariances_ * bgmm.degrees_of_freedom_
         assert_almost_equal(spherical_covariances, np.mean(diag_covariances, 1))

From b05eca06826157df44260b7b8b774a27b2a38898 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 28 Feb 2025 15:22:59 +0100
Subject: [PATCH 14/92] Compare to numpy result in test

---
 .../mixture/tests/test_gaussian_mixture.py    | 22 ++++++++++++-------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index 5ef5573d4e372..5a8c40f00e301 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -1480,15 +1480,21 @@ def test_gaussian_mixture_all_init_does_not_estimate_gaussian_parameters(
 )
 def test_gaussian_mixture_array_api_compliance(array_namespace, device, dtype):
     X, y = make_blobs(n_samples=int(1e3), n_features=2, centers=3, random_state=0)
+    gmm = GaussianMixture(
+        n_components=3,
+        covariance_type="diag",
+        random_state=0,
+        init_params="random",
+    )
+    gmm.fit(X)
+    means_ref = gmm.means_
+    covariances_ref = gmm.covariances_
+
     xp = _array_api_for_tests(array_namespace, device)
     X = xp.asarray(X, device=device)
     y = xp.asarray(y, device=device)
     with sklearn.config_context(array_api_dispatch=True):
-        gmm = GaussianMixture(
-            n_components=3,
-            covariance_type="diag",
-            random_state=0,
-            init_params="random",
-            tol=1e-5,
-            max_iter=1000,
-        ).fit(X)
+        gmm.fit(X)
+
+    assert_allclose(means_ref, gmm.means_)
+    assert_allclose(covariances_ref, gmm.covariances_)

From c35bdd6563798736f932833d6380c5271e43bc80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 28 Feb 2025 15:28:00 +0100
Subject: [PATCH 15/92] Use global_random_seed

---
 sklearn/mixture/tests/test_gaussian_mixture.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index 5a8c40f00e301..4d17fdc31d4a2 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -1478,12 +1478,16 @@ def test_gaussian_mixture_all_init_does_not_estimate_gaussian_parameters(
 @pytest.mark.parametrize(
     "array_namespace, device, dtype", yield_namespace_device_dtype_combinations()
 )
-def test_gaussian_mixture_array_api_compliance(array_namespace, device, dtype):
-    X, y = make_blobs(n_samples=int(1e3), n_features=2, centers=3, random_state=0)
+def test_gaussian_mixture_array_api_compliance(
+    array_namespace, device, dtype, global_random_seed
+):
+    X, y = make_blobs(
+        n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed
+    )
     gmm = GaussianMixture(
         n_components=3,
         covariance_type="diag",
-        random_state=0,
+        random_state=global_random_seed,
         init_params="random",
     )
     gmm.fit(X)

From 4516920cf884425caad2075055b1e8646351d6e0 Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Wed, 12 Mar 2025 14:30:03 +0100
Subject: [PATCH 16/92] retrigger CI


From e9740511b8e290447f20b39e43c079888a733098 Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Wed, 12 Mar 2025 14:30:39 +0100
Subject: [PATCH 17/92] retrigger CI


From 1a7f262160c143e075401d165467ab341928fcb1 Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Wed, 12 Mar 2025 14:36:18 +0100
Subject: [PATCH 18/92] retrigger CI [azure parallel]


From fb408708d32ef32104392bf837634bd41b97b64a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Thu, 13 Mar 2025 15:43:15 +0100
Subject: [PATCH 19/92] A bit further with setting the device more correctly

---
 sklearn/mixture/_base.py                      |  7 +-
 sklearn/mixture/_gaussian_mixture.py          | 10 ++-
 .../mixture/tests/test_gaussian_mixture.py    | 13 +++-
 sklearn/utils/_array_api.py                   | 68 +++++++++++++++----
 4 files changed, 77 insertions(+), 21 deletions(-)

diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index 4e4c906a054c7..892602415a88a 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -16,7 +16,7 @@
 from ..cluster import kmeans_plusplus
 from ..exceptions import ConvergenceWarning
 from ..utils import check_random_state
-from ..utils._array_api import get_namespace
+from ..utils._array_api import get_namespace, get_namespace_and_device
 from ..utils._param_validation import Interval, StrOptions
 from ..utils.validation import check_is_fitted, validate_data
 
@@ -120,8 +120,11 @@ def _initialize_parameters(self, X, random_state, xp):
             )
             resp[xp.arange(n_samples), label] = 1
         elif self.init_params == "random":
+            xp, _, device = get_namespace_and_device(X)
             resp = xp.asarray(
-                random_state.uniform(size=(n_samples, self.n_components)), dtype=X.dtype
+                random_state.uniform(size=(n_samples, self.n_components)),
+                dtype=X.dtype,
+                device=device,
             )
             resp /= xp.sum(resp, axis=1)[:, xp.newaxis]
         elif self.init_params == "random_from_data":
diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index 47ca220cfe7d2..93705882d1486 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -7,7 +7,7 @@
 from scipy import linalg
 
 from ..utils import check_array
-from ..utils._array_api import get_namespace
+from ..utils._array_api import get_namespace, get_namespace_and_device
 from ..utils._param_validation import StrOptions
 from ..utils.extmath import row_norms
 from ._base import BaseMixture, _check_shape
@@ -485,7 +485,7 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type):
     -------
     log_prob : array, shape (n_samples, n_components)
     """
-    xp, _ = get_namespace(X, means, precisions_chol)
+    xp, _, device = get_namespace_and_device(X, means, precisions_chol)
     n_samples, n_features = X.shape
     n_components, _ = means.shape
     # The determinant of the precision matrix from the Cholesky decomposition
@@ -524,7 +524,11 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type):
     # Since we are using the precision of the Cholesky decomposition,
     # `- 0.5 * log_det_precision` becomes `+ log_det_precision_chol`
     return (
-        -0.5 * (n_features * xp.log(xp.asarray(2 * xp.pi, dtype=X.dtype)) + log_prob)
+        -0.5
+        * (
+            n_features * xp.log(xp.asarray(2 * xp.pi, dtype=X.dtype, device=device))
+            + log_prob
+        )
         + log_det
     )
 
diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index 4d17fdc31d4a2..62d3c606124d4 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -30,7 +30,10 @@
     _estimate_gaussian_covariances_tied,
     _estimate_gaussian_parameters,
 )
-from sklearn.utils._array_api import yield_namespace_device_dtype_combinations
+from sklearn.utils._array_api import (
+    _convert_to_numpy,
+    yield_namespace_device_dtype_combinations,
+)
 from sklearn.utils._testing import (
     _array_api_for_tests,
     assert_allclose,
@@ -1500,5 +1503,9 @@ def test_gaussian_mixture_array_api_compliance(
     with sklearn.config_context(array_api_dispatch=True):
         gmm.fit(X)
 
-    assert_allclose(means_ref, gmm.means_)
-    assert_allclose(covariances_ref, gmm.covariances_)
+    # TODO is there an easy way to test device? device can be None or 'cpu' in
+    # the numpy case ...
+    # assert gmm.means_.device == device
+    # assert gmm.covariances_.device == device
+    assert_allclose(means_ref, _convert_to_numpy(gmm.means_, xp=xp))
+    assert_allclose(covariances_ref, _convert_to_numpy(gmm.covariances_, xp=xp))
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 65503a0674a70..e65ebcce169b2 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -82,6 +82,19 @@ def yield_namespace_device_dtype_combinations(include_numpy_namespaces=True):
             ):
                 yield array_namespace, device, dtype
             yield array_namespace, "mps", "float32"
+
+        elif array_namespace == "array_api_strict":
+            try:
+                import array_api_strict  # noqa
+
+                yield array_namespace, array_api_strict.Device("CPU_DEVICE"), "float64"
+                yield array_namespace, array_api_strict.Device("device1"), "float32"
+            except ImportError:
+                # Those combinations will typically be skipped by pytest if
+                # array_api_strict is not installed but we still need to see them in
+                # the test output.
+                yield array_namespace, "CPU_DEVICE", "float64"
+                yield array_namespace, "device1", "float32"
         else:
             yield array_namespace, None, None
 
@@ -582,12 +595,14 @@ def get_namespace(*arrays, remove_none=True, remove_types=(str,), xp=None):
     if namespace.__name__ == "array_api_strict" and hasattr(
         namespace, "set_array_api_strict_flags"
     ):
-        namespace.set_array_api_strict_flags(api_version="2023.12")
+        namespace.set_array_api_strict_flags(api_version="2024.12")
 
     return namespace, is_array_api_compliant
 
 
-def get_namespace_and_device(*array_list, remove_none=True, remove_types=(str,)):
+def get_namespace_and_device(
+    *array_list, remove_none=True, remove_types=(str,), xp=None
+):
     """Combination into one single function of `get_namespace` and `device`.
 
     Parameters
@@ -598,6 +613,10 @@ def get_namespace_and_device(*array_list, remove_none=True, remove_types=(str,))
         Whether to ignore None objects passed in arrays.
     remove_types : tuple or list, default=(str,)
         Types to ignore in the arrays.
+    xp : module, default=None
+        Precomputed array namespace module. When passed, typically from a caller
+        that has already performed inspection of its own inputs, skips array
+        namespace inspection.
 
     Returns
     -------
@@ -610,16 +629,20 @@ def get_namespace_and_device(*array_list, remove_none=True, remove_types=(str,))
     device : device
         `device` object (see the "Device Support" section of the array API spec).
     """
+    skip_remove_kwargs = dict(remove_none=False, remove_types=[])
+
     array_list = _remove_non_arrays(
         *array_list,
         remove_none=remove_none,
         remove_types=remove_types,
     )
+    arrays_device = device(*array_list, **skip_remove_kwargs)
 
-    skip_remove_kwargs = dict(remove_none=False, remove_types=[])
+    if xp is None:
+        xp, is_array_api = get_namespace(*array_list, **skip_remove_kwargs)
+    else:
+        xp, is_array_api = xp, True
 
-    xp, is_array_api = get_namespace(*array_list, **skip_remove_kwargs)
-    arrays_device = device(*array_list, **skip_remove_kwargs)
     if is_array_api:
         return xp, is_array_api, arrays_device
     else:
@@ -769,49 +792,66 @@ def _average(a, axis=None, weights=None, normalize=True, xp=None):
     return sum_ / scale
 
 
+def _xlogy(x, y, xp=None):
+    # TODO: Remove this once https://github.com/scipy/scipy/issues/21736 is fixed
+    xp, _, device_ = get_namespace_and_device(x, y, xp=xp)
+
+    with numpy.errstate(divide="ignore", invalid="ignore"):
+        temp = x * xp.log(y)
+    return xp.where(x == 0.0, xp.asarray(0.0, dtype=temp.dtype, device=device_), temp)
+
+
 def _nanmin(X, axis=None, xp=None):
     # TODO: refactor once nan-aware reductions are standardized:
     # https://github.com/data-apis/array-api/issues/621
-    xp, _ = get_namespace(X, xp=xp)
+    xp, _, device_ = get_namespace_and_device(X, xp=xp)
     if _is_numpy_namespace(xp):
         return xp.asarray(numpy.nanmin(X, axis=axis))
 
     else:
         mask = xp.isnan(X)
-        X = xp.min(xp.where(mask, xp.asarray(+xp.inf, device=device(X)), X), axis=axis)
+        X = xp.min(
+            xp.where(mask, xp.asarray(+xp.inf, dtype=X.dtype, device=device_), X),
+            axis=axis,
+        )
         # Replace Infs from all NaN slices with NaN again
         mask = xp.all(mask, axis=axis)
         if xp.any(mask):
-            X = xp.where(mask, xp.asarray(xp.nan), X)
+            X = xp.where(mask, xp.asarray(xp.nan, dtype=X.dtype, device=device_), X)
         return X
 
 
 def _nanmax(X, axis=None, xp=None):
     # TODO: refactor once nan-aware reductions are standardized:
     # https://github.com/data-apis/array-api/issues/621
-    xp, _ = get_namespace(X, xp=xp)
+    xp, _, device_ = get_namespace_and_device(X, xp=xp)
     if _is_numpy_namespace(xp):
         return xp.asarray(numpy.nanmax(X, axis=axis))
 
     else:
         mask = xp.isnan(X)
-        X = xp.max(xp.where(mask, xp.asarray(-xp.inf, device=device(X)), X), axis=axis)
+        X = xp.max(
+            xp.where(mask, xp.asarray(-xp.inf, dtype=X.dtype, device=device_), X),
+            axis=axis,
+        )
         # Replace Infs from all NaN slices with NaN again
         mask = xp.all(mask, axis=axis)
         if xp.any(mask):
-            X = xp.where(mask, xp.asarray(xp.nan), X)
+            X = xp.where(mask, xp.asarray(xp.nan, dtype=X.dtype, device=device_), X)
         return X
 
 
 def _nanmean(X, axis=None, xp=None):
     # TODO: refactor once nan-aware reductions are standardized:
     # https://github.com/data-apis/array-api/issues/621
-    xp, _ = get_namespace(X, xp=xp)
+    xp, _, device_ = get_namespace_and_device(X, xp=xp)
     if _is_numpy_namespace(xp):
         return xp.asarray(numpy.nanmean(X, axis=axis))
     else:
         mask = xp.isnan(X)
-        total = xp.sum(xp.where(mask, xp.asarray(0.0, device=device(X)), X), axis=axis)
+        total = xp.sum(
+            xp.where(mask, xp.asarray(0.0, dtype=X.dtype, device=device_), X), axis=axis
+        )
         count = xp.sum(xp.astype(xp.logical_not(mask), X.dtype), axis=axis)
         return total / count
 
@@ -868,6 +908,8 @@ def _convert_to_numpy(array, xp):
         return array.cpu().numpy()
     elif xp_name in {"array_api_compat.cupy", "cupy"}:  # pragma: nocover
         return array.get()
+    elif xp_name in {"array_api_strict"}:
+        return numpy.asarray(xp.asarray(array, device=xp.Device("CPU_DEVICE")))
 
     return numpy.asarray(array)
 

From f2eba56128d6de80217cde6d8a3422f1b0697126 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 14 Mar 2025 16:51:03 +0100
Subject: [PATCH 20/92] Add our own implementation of logsumexp [azure
 parallel]

---
 sklearn/mixture/_base.py    | 7 +++----
 sklearn/utils/_array_api.py | 7 +++++++
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index 892602415a88a..9a364cd8c01ea 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -9,14 +9,13 @@
 from time import time
 
 import numpy as np
-from scipy.special import logsumexp
 
 from .. import cluster
 from ..base import BaseEstimator, DensityMixin, _fit_context
 from ..cluster import kmeans_plusplus
 from ..exceptions import ConvergenceWarning
 from ..utils import check_random_state
-from ..utils._array_api import get_namespace, get_namespace_and_device
+from ..utils._array_api import _logsumexp, get_namespace, get_namespace_and_device
 from ..utils._param_validation import Interval, StrOptions
 from ..utils.validation import check_is_fitted, validate_data
 
@@ -352,7 +351,7 @@ def score_samples(self, X):
         check_is_fitted(self)
         X = validate_data(self, X, reset=False)
 
-        return logsumexp(self._estimate_weighted_log_prob(X), axis=1)
+        return _logsumexp(self._estimate_weighted_log_prob(X), axis=1)
 
     def score(self, X, y=None):
         """Compute the per-sample average log-likelihood of the given data X.
@@ -534,7 +533,7 @@ def _estimate_log_prob_resp(self, X):
         xp, _ = get_namespace(X)
         weighted_log_prob = self._estimate_weighted_log_prob(X)
         # TODO scipy.special.logsumexp needs scipy >= 1.15 for array API support
-        log_prob_norm = logsumexp(weighted_log_prob, axis=1)
+        log_prob_norm = _logsumexp(weighted_log_prob, axis=1)
         with np.errstate(under="ignore"):
             # ignore underflow
             log_resp = weighted_log_prob - log_prob_norm[:, xp.newaxis]
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index e65ebcce169b2..976044525c669 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -1150,3 +1150,10 @@ def _tolist(array, xp=None):
         return array.tolist()
     array_np = _convert_to_numpy(array, xp=xp)
     return [element.item() for element in array_np]
+
+
+def _logsumexp(array, axis=None, xp=None):
+    # TODO replace by scipy.special.logsumexp when
+    # https://github.com/scipy/scipy/pull/22683 is in a relase
+    xp, _ = get_namespace(array, xp=xp)
+    return xp.log(xp.sum(xp.exp(array), axis=axis))

From a0f8d2598b09569bd1d2ae581fbdfed4298585cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 14 Mar 2025 22:42:56 +0100
Subject: [PATCH 21/92] Fix implementation of logsumexp

---
 sklearn/mixture/_base.py    |  2 +-
 sklearn/utils/_array_api.py | 29 ++++++++++++++++++++++++++++-
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index 9a364cd8c01ea..6e032f560f256 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -532,8 +532,8 @@ def _estimate_log_prob_resp(self, X):
         """
         xp, _ = get_namespace(X)
         weighted_log_prob = self._estimate_weighted_log_prob(X)
-        # TODO scipy.special.logsumexp needs scipy >= 1.15 for array API support
         log_prob_norm = _logsumexp(weighted_log_prob, axis=1)
+
         with np.errstate(under="ignore"):
             # ignore underflow
             log_resp = weighted_log_prob - log_prob_norm[:, xp.newaxis]
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 976044525c669..9642b5591599a 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -1155,5 +1155,32 @@ def _tolist(array, xp=None):
 def _logsumexp(array, axis=None, xp=None):
     # TODO replace by scipy.special.logsumexp when
     # https://github.com/scipy/scipy/pull/22683 is in a relase
+    # The following code is strongly inspired and simplified from
+    # scipy.special._logsumexp.logsumexp
     xp, _ = get_namespace(array, xp=xp)
-    return xp.log(xp.sum(xp.exp(array), axis=axis))
+    axis = tuple(range(array.ndim)) if axis is None else axis
+
+    supported_dtypes = supported_float_dtypes(xp)
+    if array.dtype not in supported_dtypes:
+        array = xp.asarray(array, dtype=supported_dtypes[0])
+    array_max = xp.max(array, axis=axis, keepdims=True)
+    index_max = array == array_max
+
+    array = xp.asarray(array, copy=True)
+    array[index_max] = -xp.inf
+    i_max_dt = xp.astype(index_max, array.dtype)
+    m = xp.sum(i_max_dt, axis=axis, keepdims=True, dtype=array.dtype)
+    # device=a_max.device is needed to avoid https://github.com/scipy/scipy/issues/22680
+    shift = xp.where(
+        xp.isfinite(array_max),
+        array_max,
+        xp.asarray(0, dtype=array_max.dtype, device=array_max.device),
+    )
+    exp = xp.exp(array - shift)
+    s = xp.sum(exp, axis=axis, keepdims=True, dtype=exp.dtype)
+    s = xp.where(s == 0, s, s / m)
+    out = xp.log1p(s) + xp.log(m) + array_max
+    out = xp.squeeze(out, axis=axis)
+    out = out[()] if out.ndim == 0 else out
+
+    return out

From 53e9917600af92b3decf1fe4dbbb5ff117bc71ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 14 Mar 2025 23:29:22 +0100
Subject: [PATCH 22/92] Fix for older numpy versions

---
 sklearn/mixture/_gaussian_mixture.py | 2 +-
 sklearn/utils/_array_api.py          | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index 93705882d1486..73d2dcfeaeae3 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -340,7 +340,7 @@ def _compute_precision_cholesky(covariances, covariance_type):
             try:
                 # TODO we are using xp.linalg instead of scipy.linalg.cholesky,
                 # maybe separate branches for array API and numpy?
-                cov_chol = xp.linalg.cholesky(covariance, upper=False)
+                cov_chol = xp.linalg.cholesky(covariance)
             except xp.linalg.LinAlgError:
                 raise ValueError(estimate_precision_error_message)
 
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 9642b5591599a..ccfee8598fe12 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -1157,7 +1157,7 @@ def _logsumexp(array, axis=None, xp=None):
     # https://github.com/scipy/scipy/pull/22683 is in a relase
     # The following code is strongly inspired and simplified from
     # scipy.special._logsumexp.logsumexp
-    xp, _ = get_namespace(array, xp=xp)
+    xp, _, device = get_namespace_and_device(array, xp=xp)
     axis = tuple(range(array.ndim)) if axis is None else axis
 
     supported_dtypes = supported_float_dtypes(xp)
@@ -1170,11 +1170,11 @@ def _logsumexp(array, axis=None, xp=None):
     array[index_max] = -xp.inf
     i_max_dt = xp.astype(index_max, array.dtype)
     m = xp.sum(i_max_dt, axis=axis, keepdims=True, dtype=array.dtype)
-    # device=a_max.device is needed to avoid https://github.com/scipy/scipy/issues/22680
+    # Specifying device explicitly is the fix for https://github.com/scipy/scipy/issues/22680
     shift = xp.where(
         xp.isfinite(array_max),
         array_max,
-        xp.asarray(0, dtype=array_max.dtype, device=array_max.device),
+        xp.asarray(0, dtype=array_max.dtype, device=device),
     )
     exp = xp.exp(array - shift)
     s = xp.sum(exp, axis=axis, keepdims=True, dtype=exp.dtype)

From ac66a02af6de0f894544773024b50b045b722557 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Sat, 15 Mar 2025 08:58:36 +0100
Subject: [PATCH 23/92] [azure parallel] Add changelog template

---
 doc/whats_new/upcoming_changes/array-api/30777.feature.rst | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 doc/whats_new/upcoming_changes/array-api/30777.feature.rst

diff --git a/doc/whats_new/upcoming_changes/array-api/30777.feature.rst b/doc/whats_new/upcoming_changes/array-api/30777.feature.rst
new file mode 100644
index 0000000000000..6afe4e345e191
--- /dev/null
+++ b/doc/whats_new/upcoming_changes/array-api/30777.feature.rst
@@ -0,0 +1,5 @@
+- :class:`sklearn.gaussian_mixture.GaussianMixture` with
+  `initialization="random"` (TODO double-check which parameters actually are
+  supported, for example kmeans-based initialization will not work for now) now
+  support Array API compatible inputs.
+  By :user:`Stefanie Senger <StefanieSenger>` and :user:`Loïc Estève <lesteve>`

From dfa92d9d073096bacd1899f9ac4990d40c51f689 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Tue, 18 Mar 2025 17:02:57 +0100
Subject: [PATCH 24/92] Remove "# noqa" inline comment

---
 sklearn/utils/_array_api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 4c9f8071b6158..652fc8c01db8d 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -85,7 +85,7 @@ def yield_namespace_device_dtype_combinations(include_numpy_namespaces=True):
 
         elif array_namespace == "array_api_strict":
             try:
-                import array_api_strict  # noqa
+                import array_api_strict
 
                 yield array_namespace, array_api_strict.Device("CPU_DEVICE"), "float64"
                 yield array_namespace, array_api_strict.Device("device1"), "float32"

From 5f440a9762d0e8f5b5042cbdbfa3b156cd28dcc1 Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Wed, 19 Mar 2025 11:38:21 +0100
Subject: [PATCH 25/92] add test for _logsumexp

---
 sklearn/utils/tests/test_array_api.py | 28 +++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index 40548406d84f2..9b199711fb4ab 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -3,6 +3,7 @@
 
 import numpy
 import pytest
+import scipy
 from numpy.testing import assert_allclose
 
 from sklearn._config import config_context
@@ -17,6 +18,7 @@
     _fill_or_add_to_diagonal,
     _is_numpy_namespace,
     _isin,
+    _logsumexp,
     _max_precision_float_dtype,
     _nanmax,
     _nanmean,
@@ -635,3 +637,29 @@ def test_sparse_device(csr_container, dispatch):
             assert get_namespace_and_device(a, numpy.array([1]))[2] is None
     except ImportError:
         raise SkipTest("array_api_compat is not installed")
+
+
+@pytest.mark.parametrize(
+    "array_namespace, device_, dtype_name", yield_namespace_device_dtype_combinations()
+)
+@pytest.mark.parametrize("axis", [0, 1, None])
+def test_logsumexp_like_scipy_logsumexp(array_namespace, device_, dtype_name, axis):
+    xp = _array_api_for_tests(array_namespace, device_)
+    array_np = numpy.asarray(
+        [
+            [0, 3, 1000],
+            [2, -1, 1000],
+            [numpy.inf, 0, 0],
+            [numpy.nan, 8, -numpy.inf],
+            [4, 0, 5],
+        ],
+        dtype=dtype_name,
+    )
+    array_xp = xp.asarray(array_np, device=device_)
+
+    res_np = scipy.special.logsumexp(array_np, axis=axis)
+
+    with config_context(array_api_dispatch=True):
+        res_xp = _logsumexp(array_xp, axis=axis)
+        res_xp = _convert_to_numpy(res_xp, xp)
+        assert_array_equal(res_np, res_xp)

From dd59446743638c6f527445de63243003517b0714 Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Wed, 19 Mar 2025 16:49:13 +0100
Subject: [PATCH 26/92] slightly improve tests

---
 .../array-api/30777.feature.rst               |  8 ++---
 sklearn/mixture/_base.py                      |  2 +-
 .../mixture/tests/test_gaussian_mixture.py    | 33 ++++++++++++-------
 3 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/doc/whats_new/upcoming_changes/array-api/30777.feature.rst b/doc/whats_new/upcoming_changes/array-api/30777.feature.rst
index 6afe4e345e191..096b851ea11e5 100644
--- a/doc/whats_new/upcoming_changes/array-api/30777.feature.rst
+++ b/doc/whats_new/upcoming_changes/array-api/30777.feature.rst
@@ -1,5 +1,3 @@
-- :class:`sklearn.gaussian_mixture.GaussianMixture` with
-  `initialization="random"` (TODO double-check which parameters actually are
-  supported, for example kmeans-based initialization will not work for now) now
-  support Array API compatible inputs.
-  By :user:`Stefanie Senger <StefanieSenger>` and :user:`Loïc Estève <lesteve>`
+- :class:`sklearn.gaussian_mixture.GaussianMixture` with `initialization="random"`,
+  `covariance_type="diag"` and `warm_start=False` now supports Array API compatible
+  inputs. By :user:`Stefanie Senger <StefanieSenger>` and :user:`Loïc Estève <lesteve>`
diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index 6e032f560f256..0995e06f4be99 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -119,7 +119,7 @@ def _initialize_parameters(self, X, random_state, xp):
             )
             resp[xp.arange(n_samples), label] = 1
         elif self.init_params == "random":
-            xp, _, device = get_namespace_and_device(X)
+            xp, _, device = get_namespace_and_device(X, xp=xp)
             resp = xp.asarray(
                 random_state.uniform(size=(n_samples, self.n_components)),
                 dtype=X.dtype,
diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index ebd0b2ab818cc..d27d93adc25be 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -1484,7 +1484,7 @@ def test_gaussian_mixture_all_init_does_not_estimate_gaussian_parameters(
 def test_gaussian_mixture_array_api_compliance(
     array_namespace, device, dtype, global_random_seed
 ):
-    X, y = make_blobs(
+    X, _ = make_blobs(
         n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed
     )
     gmm = GaussianMixture(
@@ -1493,19 +1493,28 @@ def test_gaussian_mixture_array_api_compliance(
         random_state=global_random_seed,
         init_params="random",
     )
+
+    gmm_dispatch = copy.deepcopy(gmm)
+
     gmm.fit(X)
-    means_ref = gmm.means_
-    covariances_ref = gmm.covariances_
 
     xp = _array_api_for_tests(array_namespace, device)
     X = xp.asarray(X, device=device)
-    y = xp.asarray(y, device=device)
-    with sklearn.config_context(array_api_dispatch=True):
-        gmm.fit(X)
 
-    # TODO is there an easy way to test device? device can be None or 'cpu' in
-    # the numpy case ...
-    # assert gmm.means_.device == device
-    # assert gmm.covariances_.device == device
-    assert_allclose(means_ref, _convert_to_numpy(gmm.means_, xp=xp))
-    assert_allclose(covariances_ref, _convert_to_numpy(gmm.covariances_, xp=xp))
+    with sklearn.config_context(array_api_dispatch=True):
+        gmm_dispatch.fit(X)
+
+        if array_namespace == "numpy":
+            assert gmm_dispatch.means_.device in ["cpu", None]
+            assert gmm_dispatch.covariances_.device in ["cpu", None]
+        elif array_namespace == "torch":
+            assert gmm_dispatch.means_.device.type == device
+            assert gmm_dispatch.covariances_.device.type == device
+        else:
+            assert gmm_dispatch.means_.device == device
+            assert gmm_dispatch.covariances_.device == device
+
+    assert_allclose(gmm.means_, _convert_to_numpy(gmm_dispatch.means_, xp=xp))
+    assert_allclose(
+        gmm.covariances_, _convert_to_numpy(gmm_dispatch.covariances_, xp=xp)
+    )

From 9e93dfa22ef36c11df3633010e1131b588c301c7 Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Wed, 19 Mar 2025 17:01:46 +0100
Subject: [PATCH 27/92] improve device checking

---
 .../mixture/tests/test_gaussian_mixture.py    | 23 ++++++++-----------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index d27d93adc25be..eb938765a0425 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -32,6 +32,7 @@
 )
 from sklearn.utils._array_api import (
     _convert_to_numpy,
+    device,
     yield_namespace_device_dtype_combinations,
 )
 from sklearn.utils._testing import (
@@ -1479,10 +1480,10 @@ def test_gaussian_mixture_all_init_does_not_estimate_gaussian_parameters(
 
 
 @pytest.mark.parametrize(
-    "array_namespace, device, dtype", yield_namespace_device_dtype_combinations()
+    "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations()
 )
 def test_gaussian_mixture_array_api_compliance(
-    array_namespace, device, dtype, global_random_seed
+    array_namespace, device_, dtype, global_random_seed
 ):
     X, _ = make_blobs(
         n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed
@@ -1498,21 +1499,17 @@ def test_gaussian_mixture_array_api_compliance(
 
     gmm.fit(X)
 
-    xp = _array_api_for_tests(array_namespace, device)
-    X = xp.asarray(X, device=device)
+    xp = _array_api_for_tests(array_namespace, device_)
+    X = xp.asarray(X, device=device_)
 
     with sklearn.config_context(array_api_dispatch=True):
         gmm_dispatch.fit(X)
 
-        if array_namespace == "numpy":
-            assert gmm_dispatch.means_.device in ["cpu", None]
-            assert gmm_dispatch.covariances_.device in ["cpu", None]
-        elif array_namespace == "torch":
-            assert gmm_dispatch.means_.device.type == device
-            assert gmm_dispatch.covariances_.device.type == device
-        else:
-            assert gmm_dispatch.means_.device == device
-            assert gmm_dispatch.covariances_.device == device
+        assert (
+            device(X)
+            == device(gmm_dispatch.means_)
+            == device(gmm_dispatch.covariances_)
+        )
 
     assert_allclose(gmm.means_, _convert_to_numpy(gmm_dispatch.means_, xp=xp))
     assert_allclose(

From 76cf0fae5e6dbfef65edd73649af2176ab220967 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 21 Mar 2025 14:47:24 +0100
Subject: [PATCH 28/92] tweak

---
 .../mixture/tests/test_gaussian_mixture.py    | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index eb938765a0425..24da5376331a3 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -1495,23 +1495,18 @@ def test_gaussian_mixture_array_api_compliance(
         init_params="random",
     )
 
-    gmm_dispatch = copy.deepcopy(gmm)
-
     gmm.fit(X)
+    means_ = gmm.means_
+    covariances_ = gmm.covariances_
 
     xp = _array_api_for_tests(array_namespace, device_)
     X = xp.asarray(X, device=device_)
 
     with sklearn.config_context(array_api_dispatch=True):
-        gmm_dispatch.fit(X)
+        gmm.fit(X)
 
-        assert (
-            device(X)
-            == device(gmm_dispatch.means_)
-            == device(gmm_dispatch.covariances_)
-        )
+        assert device(X) == device(gmm.means_)
+        assert device(X) == device(gmm.covariances_)
 
-    assert_allclose(gmm.means_, _convert_to_numpy(gmm_dispatch.means_, xp=xp))
-    assert_allclose(
-        gmm.covariances_, _convert_to_numpy(gmm_dispatch.covariances_, xp=xp)
-    )
+    assert_allclose(means_, _convert_to_numpy(gmm.means_, xp=xp))
+    assert_allclose(covariances_, _convert_to_numpy(gmm.covariances_, xp=xp))

From 489c3e3a4b73ea803cefd9f791b2d5743846780a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 21 Mar 2025 15:39:30 +0100
Subject: [PATCH 29/92] Pass xp along the call chain

---
 sklearn/mixture/_base.py             | 32 ++++++-------
 sklearn/mixture/_bayesian_mixture.py |  6 +--
 sklearn/mixture/_gaussian_mixture.py | 68 +++++++++++++++-------------
 3 files changed, 55 insertions(+), 51 deletions(-)

diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index 0995e06f4be99..55ba77a6ce997 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -95,7 +95,7 @@ def _check_parameters(self, X):
         """
         pass
 
-    def _initialize_parameters(self, X, random_state, xp):
+    def _initialize_parameters(self, X, random_state, xp=None):
         """Initialize the model parameters.
 
         Parameters
@@ -249,8 +249,8 @@ def fit_predict(self, X, y=None):
                 for n_iter in range(1, self.max_iter + 1):
                     prev_lower_bound = lower_bound
 
-                    log_prob_norm, log_resp = self._e_step(X)
-                    self._m_step(X, log_resp)
+                    log_prob_norm, log_resp = self._e_step(X, xp=xp)
+                    self._m_step(X, log_resp, xp=xp)
                     lower_bound = self._compute_lower_bound(log_resp, log_prob_norm)
 
                     change = lower_bound - prev_lower_bound
@@ -288,11 +288,11 @@ def fit_predict(self, X, y=None):
         # Always do a final e-step to guarantee that the labels returned by
         # fit_predict(X) are always consistent with fit(X).predict(X)
         # for any value of max_iter and tol (and any random_state).
-        _, log_resp = self._e_step(X)
+        _, log_resp = self._e_step(X, xp=xp)
 
         return xp.argmax(log_resp, axis=1)
 
-    def _e_step(self, X):
+    def _e_step(self, X, xp=None):
         """E step.
 
         Parameters
@@ -308,8 +308,8 @@ def _e_step(self, X):
             Logarithm of the posterior probabilities (or responsibilities) of
             the point of each sample in X.
         """
-        xp, _ = get_namespace(X)
-        log_prob_norm, log_resp = self._estimate_log_prob_resp(X)
+        xp, _ = get_namespace(X, xp=xp)
+        log_prob_norm, log_resp = self._estimate_log_prob_resp(X, xp=xp)
         return xp.mean(log_prob_norm), log_resp
 
     @abstractmethod
@@ -407,7 +407,7 @@ def predict_proba(self, X):
         check_is_fitted(self)
         X = validate_data(self, X, reset=False)
         xp, _ = get_namespace(X)
-        _, log_resp = self._estimate_log_prob_resp(X)
+        _, log_resp = self._estimate_log_prob_resp(X, xp=xp)
         return xp.exp(log_resp)
 
     def sample(self, n_samples=1):
@@ -472,7 +472,7 @@ def sample(self, n_samples=1):
 
         return (X, y)
 
-    def _estimate_weighted_log_prob(self, X):
+    def _estimate_weighted_log_prob(self, X, xp=None):
         """Estimate the weighted log-probabilities, log P(X | Z) + log weights.
 
         Parameters
@@ -483,10 +483,10 @@ def _estimate_weighted_log_prob(self, X):
         -------
         weighted_log_prob : array, shape (n_samples, n_component)
         """
-        return self._estimate_log_prob(X) + self._estimate_log_weights()
+        return self._estimate_log_prob(X, xp=xp) + self._estimate_log_weights(xp=xp)
 
     @abstractmethod
-    def _estimate_log_weights(self):
+    def _estimate_log_weights(self, xp=None):
         """Estimate log-weights in EM algorithm, E[ log pi ] in VB algorithm.
 
         Returns
@@ -496,7 +496,7 @@ def _estimate_log_weights(self):
         pass
 
     @abstractmethod
-    def _estimate_log_prob(self, X):
+    def _estimate_log_prob(self, X, xp=None):
         """Estimate the log-probabilities log P(X | Z).
 
         Compute the log-probabilities per each component for each sample.
@@ -511,7 +511,7 @@ def _estimate_log_prob(self, X):
         """
         pass
 
-    def _estimate_log_prob_resp(self, X):
+    def _estimate_log_prob_resp(self, X, xp=None):
         """Estimate log probabilities and responsibilities for each sample.
 
         Compute the log probabilities, weighted log probabilities per
@@ -530,9 +530,9 @@ def _estimate_log_prob_resp(self, X):
         log_responsibilities : array, shape (n_samples, n_components)
             logarithm of the responsibilities
         """
-        xp, _ = get_namespace(X)
-        weighted_log_prob = self._estimate_weighted_log_prob(X)
-        log_prob_norm = _logsumexp(weighted_log_prob, axis=1)
+        xp, _ = get_namespace(X, xp=xp)
+        weighted_log_prob = self._estimate_weighted_log_prob(X, xp=xp)
+        log_prob_norm = _logsumexp(weighted_log_prob, axis=1, xp=xp)
 
         with np.errstate(under="ignore"):
             # ignore underflow
diff --git a/sklearn/mixture/_bayesian_mixture.py b/sklearn/mixture/_bayesian_mixture.py
index 7de5cc844b098..babe54aeb7693 100644
--- a/sklearn/mixture/_bayesian_mixture.py
+++ b/sklearn/mixture/_bayesian_mixture.py
@@ -718,7 +718,7 @@ def _estimate_wishart_spherical(self, nk, xk, sk):
         # Contrary to the original bishop book, we normalize the covariances
         self.covariances_ /= self.degrees_of_freedom_
 
-    def _m_step(self, X, log_resp):
+    def _m_step(self, X, log_resp, xp=None):
         """M step.
 
         Parameters
@@ -738,7 +738,7 @@ def _m_step(self, X, log_resp):
         self._estimate_means(nk, xk)
         self._estimate_precisions(nk, xk, sk)
 
-    def _estimate_log_weights(self):
+    def _estimate_log_weights(self, xp=None):
         if self.weight_concentration_prior_type == "dirichlet_process":
             digamma_sum = digamma(
                 self.weight_concentration_[0] + self.weight_concentration_[1]
@@ -756,7 +756,7 @@ def _estimate_log_weights(self):
                 np.sum(self.weight_concentration_)
             )
 
-    def _estimate_log_prob(self, X):
+    def _estimate_log_prob(self, X, xp=None):
         _, n_features = X.shape
         # We remove `n_features * np.log(self.degrees_of_freedom_)` because
         # the precision matrix is normalized
diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index 4614c4072e3c1..0ba9db226864d 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -151,7 +151,7 @@ def _check_precisions(precisions, covariance_type, n_components, n_features):
 # Gaussian mixture parameters estimators (used by the M-Step)
 
 
-def _estimate_gaussian_covariances_full(resp, X, nk, means, reg_covar):
+def _estimate_gaussian_covariances_full(resp, X, nk, means, reg_covar, xp=None):
     """Estimate the full covariance matrices.
 
     Parameters
@@ -171,7 +171,7 @@ def _estimate_gaussian_covariances_full(resp, X, nk, means, reg_covar):
     covariances : array, shape (n_components, n_features, n_features)
         The covariance matrix of the current components.
     """
-    xp, _ = get_namespace(X)
+    xp, _ = get_namespace(X, xp=xp)
     n_components, n_features = means.shape
     covariances = xp.empty((n_components, n_features, n_features), dtype=X.dtype)
     for k in range(n_components):
@@ -182,7 +182,7 @@ def _estimate_gaussian_covariances_full(resp, X, nk, means, reg_covar):
     return covariances
 
 
-def _estimate_gaussian_covariances_tied(resp, X, nk, means, reg_covar):
+def _estimate_gaussian_covariances_tied(resp, X, nk, means, reg_covar, xp=None):
     """Estimate the tied covariance matrix.
 
     Parameters
@@ -202,6 +202,7 @@ def _estimate_gaussian_covariances_tied(resp, X, nk, means, reg_covar):
     covariance : array, shape (n_features, n_features)
         The tied covariance matrix of the components.
     """
+    # TODO still using np here ...
     avg_X2 = np.dot(X.T, X)
     avg_means2 = np.dot(nk * means.T, means)
     covariance = avg_X2 - avg_means2
@@ -210,7 +211,7 @@ def _estimate_gaussian_covariances_tied(resp, X, nk, means, reg_covar):
     return covariance
 
 
-def _estimate_gaussian_covariances_diag(resp, X, nk, means, reg_covar):
+def _estimate_gaussian_covariances_diag(resp, X, nk, means, reg_covar, xp=None):
     """Estimate the diagonal covariance vectors.
 
     Parameters
@@ -230,13 +231,13 @@ def _estimate_gaussian_covariances_diag(resp, X, nk, means, reg_covar):
     covariances : array, shape (n_components, n_features)
         The covariance vector of the current components.
     """
-    xp, _ = get_namespace(X)
+    xp, _ = get_namespace(X, xp=xp)
     avg_X2 = (resp.T @ (X * X)) / nk[:, xp.newaxis]
     avg_means2 = means**2
     return avg_X2 - avg_means2 + reg_covar
 
 
-def _estimate_gaussian_covariances_spherical(resp, X, nk, means, reg_covar):
+def _estimate_gaussian_covariances_spherical(resp, X, nk, means, reg_covar, xp=None):
     """Estimate the spherical variance values.
 
     Parameters
@@ -256,10 +257,12 @@ def _estimate_gaussian_covariances_spherical(resp, X, nk, means, reg_covar):
     variances : array, shape (n_components,)
         The variance values of each components.
     """
-    return _estimate_gaussian_covariances_diag(resp, X, nk, means, reg_covar).mean(1)
+    return _estimate_gaussian_covariances_diag(
+        resp, X, nk, means, reg_covar, xp=xp
+    ).mean(1)
 
 
-def _estimate_gaussian_parameters(X, resp, reg_covar, covariance_type):
+def _estimate_gaussian_parameters(X, resp, reg_covar, covariance_type, xp=None):
     """Estimate the Gaussian distribution parameters.
 
     Parameters
@@ -288,7 +291,7 @@ def _estimate_gaussian_parameters(X, resp, reg_covar, covariance_type):
         The covariance matrix of the current components.
         The shape depends of the covariance_type.
     """
-    xp, _ = get_namespace(X)
+    xp, _ = get_namespace(X, xp=xp)
     nk = xp.sum(resp, axis=0) + 10 * xp.finfo(resp.dtype).eps
     means = (resp.T @ X) / nk[:, xp.newaxis]
     covariances = {
@@ -296,11 +299,11 @@ def _estimate_gaussian_parameters(X, resp, reg_covar, covariance_type):
         "tied": _estimate_gaussian_covariances_tied,
         "diag": _estimate_gaussian_covariances_diag,
         "spherical": _estimate_gaussian_covariances_spherical,
-    }[covariance_type](resp, X, nk, means, reg_covar)
+    }[covariance_type](resp, X, nk, means, reg_covar, xp=xp)
     return nk, means, covariances
 
 
-def _compute_precision_cholesky(covariances, covariance_type):
+def _compute_precision_cholesky(covariances, covariance_type, xp=None):
     """Compute the Cholesky decomposition of the precisions.
 
     Parameters
@@ -318,7 +321,7 @@ def _compute_precision_cholesky(covariances, covariance_type):
         The cholesky decomposition of sample precisions of the current
         components. The shape depends of the covariance_type.
     """
-    xp, _ = get_namespace(covariances)
+    xp, _ = get_namespace(covariances, xp=xp)
 
     estimate_precision_error_message = (
         "Fitting the mixture model failed because some components have "
@@ -370,7 +373,7 @@ def _flipudlr(array):
     return np.flipud(np.fliplr(array))
 
 
-def _compute_precision_cholesky_from_precisions(precisions, covariance_type):
+def _compute_precision_cholesky_from_precisions(precisions, covariance_type, xp=None):
     r"""Compute the Cholesky decomposition of precisions using precisions themselves.
 
     As implemented in :func:`_compute_precision_cholesky`, the `precisions_cholesky_` is
@@ -404,6 +407,7 @@ def _compute_precision_cholesky_from_precisions(precisions, covariance_type):
         The cholesky decomposition of sample precisions of the current
         components. The shape depends on the covariance_type.
     """
+    # TODO still using np here ...
     if covariance_type == "full":
         precisions_cholesky = np.array(
             [
@@ -422,7 +426,7 @@ def _compute_precision_cholesky_from_precisions(precisions, covariance_type):
 
 ###############################################################################
 # Gaussian mixture probability estimators
-def _compute_log_det_cholesky(matrix_chol, covariance_type, n_features):
+def _compute_log_det_cholesky(matrix_chol, covariance_type, n_features, xp=None):
     """Compute the log-det of the cholesky decomposition of matrices.
 
     Parameters
@@ -444,7 +448,7 @@ def _compute_log_det_cholesky(matrix_chol, covariance_type, n_features):
     log_det_precision_chol : array-like of shape (n_components,)
         The determinant of the precision matrix for each component.
     """
-    xp, _ = get_namespace(matrix_chol)
+    xp, _ = get_namespace(matrix_chol, xp=xp)
     if covariance_type == "full":
         n_components, _, _ = matrix_chol.shape
         log_det_chol = xp.sum(
@@ -463,7 +467,7 @@ def _compute_log_det_cholesky(matrix_chol, covariance_type, n_features):
     return log_det_chol
 
 
-def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type):
+def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type, xp=None):
     """Estimate the log Gaussian probability.
 
     Parameters
@@ -485,7 +489,7 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type):
     -------
     log_prob : array, shape (n_samples, n_components)
     """
-    xp, _, device = get_namespace_and_device(X, means, precisions_chol)
+    xp, _, device = get_namespace_and_device(X, means, precisions_chol, xp=xp)
     n_samples, n_features = X.shape
     n_components, _ = means.shape
     # The determinant of the precision matrix from the Cholesky decomposition
@@ -786,7 +790,7 @@ def _check_parameters(self, X):
                 n_features,
             )
 
-    def _initialize_parameters(self, X, random_state, xp):
+    def _initialize_parameters(self, X, random_state, xp=None):
         # If all the initial parameters are all provided, then there is no need to run
         # the initialization.
         compute_resp = (
@@ -795,11 +799,11 @@ def _initialize_parameters(self, X, random_state, xp):
             or self.precisions_init is None
         )
         if compute_resp:
-            super()._initialize_parameters(X, random_state, xp)
+            super()._initialize_parameters(X, random_state, xp=xp)
         else:
-            self._initialize(X, None)
+            self._initialize(X, None, xp=xp)
 
-    def _initialize(self, X, resp):
+    def _initialize(self, X, resp, xp=None):
         """Initialization of the Gaussian mixture parameters.
 
         Parameters
@@ -812,7 +816,7 @@ def _initialize(self, X, resp):
         weights, means, covariances = None, None, None
         if resp is not None:
             weights, means, covariances = _estimate_gaussian_parameters(
-                X, resp, self.reg_covar, self.covariance_type
+                X, resp, self.reg_covar, self.covariance_type, xp=xp
             )
             if self.weights_init is None:
                 weights /= n_samples
@@ -823,14 +827,14 @@ def _initialize(self, X, resp):
         if self.precisions_init is None:
             self.covariances_ = covariances
             self.precisions_cholesky_ = _compute_precision_cholesky(
-                covariances, self.covariance_type
+                covariances, self.covariance_type, xp=xp
             )
         else:
             self.precisions_cholesky_ = _compute_precision_cholesky_from_precisions(
-                self.precisions_init, self.covariance_type
+                self.precisions_init, self.covariance_type, xp=xp
             )
 
-    def _m_step(self, X, log_resp):
+    def _m_step(self, X, log_resp, xp=None):
         """M step.
 
         Parameters
@@ -841,22 +845,22 @@ def _m_step(self, X, log_resp):
             Logarithm of the posterior probabilities (or responsibilities) of
             the point of each sample in X.
         """
-        xp, _ = get_namespace(X, log_resp)
+        xp, _ = get_namespace(X, log_resp, xp=xp)
         self.weights_, self.means_, self.covariances_ = _estimate_gaussian_parameters(
-            X, xp.exp(log_resp), self.reg_covar, self.covariance_type
+            X, xp.exp(log_resp), self.reg_covar, self.covariance_type, xp=xp
         )
         self.weights_ /= xp.sum(self.weights_)
         self.precisions_cholesky_ = _compute_precision_cholesky(
-            self.covariances_, self.covariance_type
+            self.covariances_, self.covariance_type, xp=xp
         )
 
-    def _estimate_log_prob(self, X):
+    def _estimate_log_prob(self, X, xp=None):
         return _estimate_log_gaussian_prob(
-            X, self.means_, self.precisions_cholesky_, self.covariance_type
+            X, self.means_, self.precisions_cholesky_, self.covariance_type, xp=xp
         )
 
-    def _estimate_log_weights(self):
-        xp, _ = get_namespace(self.weights_)
+    def _estimate_log_weights(self, xp=None):
+        xp, _ = get_namespace(self.weights_, xp=xp)
         return xp.log(self.weights_)
 
     def _compute_lower_bound(self, _, log_prob_norm):

From 6dccb4702ce433d2c02bde969943c5b7c5b702ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 21 Mar 2025 16:30:04 +0100
Subject: [PATCH 30/92] tweak

---
 sklearn/mixture/_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index 55ba77a6ce997..aca4c1f082929 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -237,7 +237,7 @@ def fit_predict(self, X, y=None):
             self._print_verbose_msg_init_beg(init)
 
             if do_init:
-                self._initialize_parameters(X, random_state, xp)
+                self._initialize_parameters(X, random_state, xp=xp)
 
             lower_bound = -xp.inf if do_init else self.lower_bound_
 

From 30894cd579aa7c2175dc5d1f5aefa75521a1e032 Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Wed, 26 Mar 2025 11:20:41 +0100
Subject: [PATCH 31/92] add NotImplementedError and test

---
 sklearn/mixture/_gaussian_mixture.py          | 14 +++++++++++
 .../mixture/tests/test_gaussian_mixture.py    | 25 +++++++++++++++++++
 2 files changed, 39 insertions(+)

diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index 0ba9db226864d..0418504156db9 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -6,6 +6,7 @@
 import numpy as np
 from scipy import linalg
 
+from .._config import get_config
 from ..utils import check_array
 from ..utils._array_api import get_namespace, get_namespace_and_device
 from ..utils._param_validation import StrOptions
@@ -790,6 +791,19 @@ def _check_parameters(self, X):
                 n_features,
             )
 
+        allowed_init_values = ["random", "random_from_data"]
+        if (
+            get_config()["array_api_dispatch"]
+            and self.init_params not in allowed_init_values
+        ):
+            raise NotImplementedError(
+                f"Allowed `init_params` are {allowed_init_values} if "
+                f"'array_api_dispatch' is enabled. You passed "
+                f"init_params={self.init_params!r}, which are not implemented to work "
+                "with 'array_api_dispatch' enabled. Please disable "
+                f"'array_api_dispatch' to use init_params={self.init_params!r}."
+            )
+
     def _initialize_parameters(self, X, random_state, xp=None):
         # If all the initial parameters are all provided, then there is no need to run
         # the initialization.
diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index 24da5376331a3..283d51daeeb8e 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -1510,3 +1510,28 @@ def test_gaussian_mixture_array_api_compliance(
 
     assert_allclose(means_, _convert_to_numpy(gmm.means_, xp=xp))
     assert_allclose(covariances_, _convert_to_numpy(gmm.covariances_, xp=xp))
+
+
+# TODO: remove when gmm works with `init_params` are `kmeans` or `k-means++`
+@pytest.mark.parametrize("init_params", ["kmeans", "k-means++"])
+@pytest.mark.parametrize(
+    "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations()
+)
+def test_gaussian_mixture_raises_where_array_api_not_implemented(
+    init_params, array_namespace, device_, dtype
+):
+    X, _ = make_blobs(
+        n_samples=int(1e3),
+        n_features=2,
+        centers=3,
+    )
+    gmm = GaussianMixture(
+        n_components=3, covariance_type="diag", init_params=init_params
+    )
+
+    with sklearn.config_context(array_api_dispatch=True):
+        with pytest.raises(
+            NotImplementedError,
+            match="Allowed `init_params`.+if 'array_api_dispatch' is enabled",
+        ):
+            gmm.fit(X)

From ae06fe173578442a033ef2d4074156ac324911be Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Wed, 26 Mar 2025 11:55:17 +0100
Subject: [PATCH 32/92] add array api support for
 init_params='random_from_data'

---
 .../upcoming_changes/array-api/30777.feature.rst      |  7 ++++---
 sklearn/mixture/_base.py                              | 11 ++++++++---
 sklearn/mixture/tests/test_gaussian_mixture.py        |  5 +++--
 3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/doc/whats_new/upcoming_changes/array-api/30777.feature.rst b/doc/whats_new/upcoming_changes/array-api/30777.feature.rst
index 096b851ea11e5..ed985fcc77d29 100644
--- a/doc/whats_new/upcoming_changes/array-api/30777.feature.rst
+++ b/doc/whats_new/upcoming_changes/array-api/30777.feature.rst
@@ -1,3 +1,4 @@
-- :class:`sklearn.gaussian_mixture.GaussianMixture` with `initialization="random"`,
-  `covariance_type="diag"` and `warm_start=False` now supports Array API compatible
-  inputs. By :user:`Stefanie Senger <StefanieSenger>` and :user:`Loïc Estève <lesteve>`
+- :class:`sklearn.gaussian_mixture.GaussianMixture` with
+  `initialization="random/random_from_data"` and `covariance_type="diag"` and
+  `warm_start=False` now supports Array API compatible inputs.
+  By :user:`Stefanie Senger <StefanieSenger>` and :user:`Loïc Estève <lesteve>`
diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index aca4c1f082929..8773b8d818ac3 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -106,6 +106,7 @@ def _initialize_parameters(self, X, random_state, xp=None):
             A random number generator instance that controls the random seed
             used for the method chosen to initialize the parameters.
         """
+        xp, _, device = get_namespace_and_device(X, xp=xp)
         n_samples, _ = X.shape
 
         if self.init_params == "kmeans":
@@ -119,7 +120,6 @@ def _initialize_parameters(self, X, random_state, xp=None):
             )
             resp[xp.arange(n_samples), label] = 1
         elif self.init_params == "random":
-            xp, _, device = get_namespace_and_device(X, xp=xp)
             resp = xp.asarray(
                 random_state.uniform(size=(n_samples, self.n_components)),
                 dtype=X.dtype,
@@ -127,11 +127,16 @@ def _initialize_parameters(self, X, random_state, xp=None):
             )
             resp /= xp.sum(resp, axis=1)[:, xp.newaxis]
         elif self.init_params == "random_from_data":
-            resp = xp.zeros((n_samples, self.n_components), dtype=X.dtype)
+            resp = xp.zeros(
+                (n_samples, self.n_components), dtype=X.dtype, device=device
+            )
             indices = random_state.choice(
                 n_samples, size=self.n_components, replace=False
             )
-            resp[indices, xp.arange(self.n_components)] = 1
+            # TODO: instead of for-loop, find something more efficient; previous code:
+            # resp[indices, xp.arange(self.n_components)] = 1
+            for count, index in enumerate(indices):
+                resp[index, count] = 1
         elif self.init_params == "k-means++":
             resp = xp.zeros((n_samples, self.n_components), dtype=X.dtype)
             _, indices = kmeans_plusplus(
diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index 283d51daeeb8e..4da93cce21f37 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -1479,11 +1479,12 @@ def test_gaussian_mixture_all_init_does_not_estimate_gaussian_parameters(
     assert mock.call_count == gm.n_iter_
 
 
+@pytest.mark.parametrize("init_params", ["random", "random_from_data"])
 @pytest.mark.parametrize(
     "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations()
 )
 def test_gaussian_mixture_array_api_compliance(
-    array_namespace, device_, dtype, global_random_seed
+    init_params, array_namespace, device_, dtype, global_random_seed
 ):
     X, _ = make_blobs(
         n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed
@@ -1492,7 +1493,7 @@ def test_gaussian_mixture_array_api_compliance(
         n_components=3,
         covariance_type="diag",
         random_state=global_random_seed,
-        init_params="random",
+        init_params=init_params,
     )
 
     gmm.fit(X)

From 3f2d92832947f40ec9931a22c176ba15d569dd0b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Wed, 26 Mar 2025 12:14:01 +0100
Subject: [PATCH 33/92] Fix?

---
 sklearn/mixture/tests/test_gaussian_mixture.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index 4da93cce21f37..2316840d8f213 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -1521,6 +1521,8 @@ def test_gaussian_mixture_array_api_compliance(
 def test_gaussian_mixture_raises_where_array_api_not_implemented(
     init_params, array_namespace, device_, dtype
 ):
+    # TODO skips tests if dependencies are not installed
+    _array_api_for_tests(array_namespace, device=None)
     X, _ = make_blobs(
         n_samples=int(1e3),
         n_features=2,

From 6be6aa234f970bef8b988ea31437fba701eb1910 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Thu, 27 Mar 2025 10:46:05 +0100
Subject: [PATCH 34/92] Add a sumlogexp test without nans or +inf

---
 sklearn/utils/tests/test_array_api.py | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index 84850ed0832eb..14366941f6928 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -596,8 +596,8 @@ def test_logsumexp_like_scipy_logsumexp(array_namespace, device_, dtype_name, ax
         [
             [0, 3, 1000],
             [2, -1, 1000],
-            [numpy.inf, 0, 0],
-            [numpy.nan, 8, -numpy.inf],
+            [-10, 0, 0],
+            [-50, 8, -numpy.inf],
             [4, 0, 5],
         ],
         dtype=dtype_name,
@@ -610,3 +610,23 @@ def test_logsumexp_like_scipy_logsumexp(array_namespace, device_, dtype_name, ax
         res_xp = _logsumexp(array_xp, axis=axis)
         res_xp = _convert_to_numpy(res_xp, xp)
         assert_array_equal(res_np, res_xp)
+
+    # Test with NaNs and np.inf
+    array_np_2 = numpy.asarray(
+        [
+            [0, numpy.nan, 1000],
+            [2, -1, 1000],
+            [numpy.inf, 0, 0],
+            [-50, 8, -numpy.inf],
+            [4, 0, 5],
+        ],
+        dtype=dtype_name,
+    )
+    array_xp_2 = xp.asarray(array_np_2, device=device_)
+
+    res_np_2 = scipy.special.logsumexp(array_np_2, axis=axis)
+
+    with config_context(array_api_dispatch=True):
+        res_xp_2 = _logsumexp(array_xp_2, axis=axis)
+        res_xp_2 = _convert_to_numpy(res_xp_2, xp)
+        assert_array_equal(res_np_2, res_xp_2)

From 805742b84a475121ee1c1eeaa947f04ce2e33ea4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Thu, 27 Mar 2025 10:46:35 +0100
Subject: [PATCH 35/92] tweak

---
 sklearn/utils/tests/test_array_api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index 14366941f6928..f732f54d411ec 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -611,7 +611,7 @@ def test_logsumexp_like_scipy_logsumexp(array_namespace, device_, dtype_name, ax
         res_xp = _convert_to_numpy(res_xp, xp)
         assert_array_equal(res_np, res_xp)
 
-    # Test with NaNs and np.inf
+    # Test with NaNs and +np.inf
     array_np_2 = numpy.asarray(
         [
             [0, numpy.nan, 1000],

From 90bf491f5f9cc91129bf9c3ef97214eab8bd0397 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Thu, 27 Mar 2025 11:31:49 +0100
Subject: [PATCH 36/92] Add test for logsumexp on default device with array API
 dispatch disabled

---
 sklearn/utils/_array_api.py           |  1 +
 sklearn/utils/tests/test_array_api.py | 14 ++++++++++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 8f0c492677c29..f3db2a7f6522a 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -998,6 +998,7 @@ def _logsumexp(array, axis=None, xp=None):
     supported_dtypes = supported_float_dtypes(xp)
     if array.dtype not in supported_dtypes:
         array = xp.asarray(array, dtype=supported_dtypes[0])
+
     array_max = xp.max(array, axis=axis, keepdims=True)
     index_max = array == array_max
 
diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index f732f54d411ec..66b47c18b55d5 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -606,10 +606,20 @@ def test_logsumexp_like_scipy_logsumexp(array_namespace, device_, dtype_name, ax
 
     res_np = scipy.special.logsumexp(array_np, axis=axis)
 
+    rtol = 1e-6 if "float32" in str(dtype_name) else 1e-12
+
+    # if torch on CPU or array api strict on default device
+    # check that _logsumexp works when array API dispatch is disabled
+    # TODO is there a better way for this
+    if (array_namespace == "torch" and device_ == "cpu") or (
+        array_namespace == "array_api_strict" and "CPU" in str(device_)
+    ):
+        assert_allclose(_logsumexp(array_xp, axis=axis), res_np, rtol=rtol)
+
     with config_context(array_api_dispatch=True):
         res_xp = _logsumexp(array_xp, axis=axis)
         res_xp = _convert_to_numpy(res_xp, xp)
-        assert_array_equal(res_np, res_xp)
+        assert_allclose(res_np, res_xp, rtol=rtol)
 
     # Test with NaNs and +np.inf
     array_np_2 = numpy.asarray(
@@ -629,4 +639,4 @@ def test_logsumexp_like_scipy_logsumexp(array_namespace, device_, dtype_name, ax
     with config_context(array_api_dispatch=True):
         res_xp_2 = _logsumexp(array_xp_2, axis=axis)
         res_xp_2 = _convert_to_numpy(res_xp_2, xp)
-        assert_array_equal(res_np_2, res_xp_2)
+        assert_allclose(res_np_2, res_xp_2, rtol=rtol)

From b07b1713dd3a7b86d677702447f5f12d3bc049f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Thu, 27 Mar 2025 11:45:33 +0100
Subject: [PATCH 37/92] Cleaner way to skip when array API dispatch is disabled

---
 sklearn/mixture/tests/test_gaussian_mixture.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index 2316840d8f213..3ea2e8d07c685 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -41,6 +41,7 @@
     assert_almost_equal,
     assert_array_almost_equal,
     assert_array_equal,
+    skip_if_array_api_compat_not_configured,
 )
 from sklearn.utils.extmath import fast_logdet
 
@@ -1514,6 +1515,7 @@ def test_gaussian_mixture_array_api_compliance(
 
 
 # TODO: remove when gmm works with `init_params` are `kmeans` or `k-means++`
+@skip_if_array_api_compat_not_configured
 @pytest.mark.parametrize("init_params", ["kmeans", "k-means++"])
 @pytest.mark.parametrize(
     "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations()
@@ -1521,8 +1523,6 @@ def test_gaussian_mixture_array_api_compliance(
 def test_gaussian_mixture_raises_where_array_api_not_implemented(
     init_params, array_namespace, device_, dtype
 ):
-    # TODO skips tests if dependencies are not installed
-    _array_api_for_tests(array_namespace, device=None)
     X, _ = make_blobs(
         n_samples=int(1e3),
         n_features=2,

From baf6982329937c0649cd30a0fc83a719b0f32b65 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Thu, 27 Mar 2025 11:45:58 +0100
Subject: [PATCH 38/92] [azure parallel]


From 339c16bd9cf0d8213e25a064d6dc33440fbc8d80 Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Wed, 2 Apr 2025 12:08:38 +0200
Subject: [PATCH 39/92] add support for weights_init

---
 sklearn/mixture/_base.py                      |  4 +-
 sklearn/mixture/_gaussian_mixture.py          | 39 ++++++----
 .../mixture/tests/test_gaussian_mixture.py    | 71 ++++++++++++++++++-
 3 files changed, 96 insertions(+), 18 deletions(-)

diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index 5373500cb65c5..59e9e4240637b 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -31,7 +31,7 @@ def _check_shape(param, param_shape, name):
 
     name : str
     """
-    param = np.array(param)
+    # param = xp.array(param)
     if param.shape != param_shape:
         raise ValueError(
             "The parameter '%s' should have the shape of %s, but got %s"
@@ -226,7 +226,7 @@ def fit_predict(self, X, y=None):
                 f"but got n_components = {self.n_components}, "
                 f"n_samples = {X.shape[0]}"
             )
-        self._check_parameters(X)
+        self._check_parameters(X, xp=xp)
 
         # if we enable warm_start, we will have a unique initialisation
         do_init = not (self.warm_start and hasattr(self, "converged_"))
diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index 0157c766ecc19..4d19718955d00 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -7,6 +7,7 @@
 from scipy import linalg
 
 from .._config import get_config
+from ..externals import array_api_extra as xpx
 from ..utils import check_array
 from ..utils._array_api import get_namespace, get_namespace_and_device
 from ..utils._param_validation import StrOptions
@@ -17,7 +18,7 @@
 # Gaussian mixture shape checkers used by the GaussianMixture class
 
 
-def _check_weights(weights, n_components):
+def _check_weights(weights, n_components, xp=None):
     """Check the user provided 'weights'.
 
     Parameters
@@ -32,23 +33,23 @@ def _check_weights(weights, n_components):
     -------
     weights : array, shape (n_components,)
     """
-    weights = check_array(weights, dtype=[np.float64, np.float32], ensure_2d=False)
+    weights = check_array(weights, dtype=[xp.float64, xp.float32], ensure_2d=False)
     _check_shape(weights, (n_components,), "weights")
 
     # check range
-    if any(np.less(weights, 0.0)) or any(np.greater(weights, 1.0)):
+    if any(xp.less(weights, 0.0)) or any(xp.greater(weights, 1.0)):
         raise ValueError(
             "The parameter 'weights' should be in the range "
             "[0, 1], but got max value %.5f, min value %.5f"
-            % (np.min(weights), np.max(weights))
+            % (xp.min(weights), xp.max(weights))
         )
 
     # check normalization
-    atol = 1e-6 if weights.dtype == np.float32 else 1e-8
-    if not np.allclose(np.abs(1.0 - np.sum(weights)), 0.0, atol=atol):
+    atol = 1e-6 if weights.dtype == xp.float32 else 1e-8
+    if not xpx.isclose(xp.abs(1.0 - xp.sum(weights)), 0.0, atol=atol, xp=xp):
         raise ValueError(
             "The parameter 'weights' should be normalized, but got sum(weights) = %.5f"
-            % np.sum(weights)
+            % xp.sum(weights)
         )
     return weights
 
@@ -342,14 +343,15 @@ def _compute_precision_cholesky(covariances, covariance_type, xp=None):
         precisions_chol = xp.empty((n_components, n_features, n_features), dtype=dtype)
         for k, covariance in enumerate(covariances):
             try:
-                # TODO we are using xp.linalg instead of scipy.linalg.cholesky,
-                # maybe separate branches for array API and numpy?
+                # TODO we are using xp.linalg instead of scipy.linalg.cholesky, maybe
+                # separate branches for array API and numpy?
                 cov_chol = xp.linalg.cholesky(covariance)
             except xp.linalg.LinAlgError:
                 raise ValueError(estimate_precision_error_message)
 
             # TODO we are using xp.linalg.solve instead of scipy.linalg.solve_triangular
-            # probably separate branches for array API and numpy?
+            # probably separate branches for array API and numpy? maybe
+            # https://github.com/scikit-learn/scikit-learn/pull/29318 is relevant
             precisions_chol[k] = xp.linalg.solve(
                 cov_chol, xp.eye(n_features, dtype=dtype)
             ).T
@@ -775,12 +777,14 @@ def __init__(
         self.means_init = means_init
         self.precisions_init = precisions_init
 
-    def _check_parameters(self, X):
+    def _check_parameters(self, X, xp=None):
         """Check the Gaussian mixture parameters are well defined."""
         _, n_features = X.shape
 
         if self.weights_init is not None:
-            self.weights_init = _check_weights(self.weights_init, self.n_components)
+            self.weights_init = _check_weights(
+                self.weights_init, self.n_components, xp=xp
+            )
 
         if self.means_init is not None:
             self.means_init = _check_means(
@@ -795,13 +799,13 @@ def _check_parameters(self, X):
                 n_features,
             )
 
-        allowed_init_values = ["random", "random_from_data"]
+        allowed_init_params = ["random", "random_from_data"]
         if (
             get_config()["array_api_dispatch"]
-            and self.init_params not in allowed_init_values
+            and self.init_params not in allowed_init_params
         ):
             raise NotImplementedError(
-                f"Allowed `init_params` are {allowed_init_values} if "
+                f"Allowed `init_params` are {allowed_init_params} if "
                 f"'array_api_dispatch' is enabled. You passed "
                 f"init_params={self.init_params!r}, which are not implemented to work "
                 "with 'array_api_dispatch' enabled. Please disable "
@@ -830,6 +834,9 @@ def _initialize(self, X, resp, xp=None):
 
         resp : array-like of shape (n_samples, n_components)
         """
+        # TODO: check if device_ should be computed in fit_predict and passed down the
+        # call chain
+        xp, _, device_ = get_namespace_and_device(X, xp=xp)
         n_samples, _ = X.shape
         weights, means, covariances = None, None, None
         if resp is not None:
@@ -840,6 +847,8 @@ def _initialize(self, X, resp, xp=None):
                 weights /= n_samples
 
         self.weights_ = weights if self.weights_init is None else self.weights_init
+        self.weights_ = xp.asarray(self.weights_, device=device_)
+
         self.means_ = means if self.means_init is None else self.means_init
 
         if self.precisions_init is None:
diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index 361344e6d363f..6d8e886c7af4f 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -9,6 +9,7 @@
 from io import StringIO
 from unittest.mock import Mock
 
+import array_api_strict
 import numpy as np
 import pytest
 from scipy import linalg, stats
@@ -1515,7 +1516,32 @@ def test_gaussian_mixture_array_api_compliance(
     assert_allclose(covariances_, _convert_to_numpy(gmm.covariances_, xp=xp))
 
 
-# TODO: remove when gmm works with `init_params` are `kmeans` or `k-means++`
+@pytest.mark.parametrize(
+    "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations()
+)
+def test_gaussian_mixture_array_api_with_weights_init(
+    array_namespace, device_, dtype, global_random_seed
+):
+    X, _ = make_blobs(
+        n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed
+    )
+
+    xp = _array_api_for_tests(array_namespace, device_)
+    X = xp.asarray(X, device=device_)
+
+    gmm = GaussianMixture(
+        n_components=3,
+        covariance_type="diag",
+        random_state=global_random_seed,
+        init_params="random",
+        weights_init=xp.asarray([0.1, 0.4, 0.5]),
+    )
+
+    with sklearn.config_context(array_api_dispatch=True):
+        gmm.fit(X)
+
+
+# TODO: remove when gmm works with `init_params` `kmeans` or `k-means++`
 @skip_if_array_api_compat_not_configured
 @pytest.mark.parametrize("init_params", ["kmeans", "k-means++"])
 @pytest.mark.parametrize(
@@ -1539,3 +1565,46 @@ def test_gaussian_mixture_raises_where_array_api_not_implemented(
             match="Allowed `init_params`.+if 'array_api_dispatch' is enabled",
         ):
             gmm.fit(X)
+
+
+@pytest.mark.parametrize(
+    "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations()
+)
+def test_gaussian_mixture_array_api_different_namespaces(
+    array_namespace, device_, dtype, global_random_seed
+):
+    """Test that array api works if `X` and `weights_init` come from different array
+    namespaces."""
+    X, _ = make_blobs(
+        n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed
+    )
+
+    # check with weights_init being a numpy array
+    gmm = GaussianMixture(
+        n_components=3,
+        covariance_type="diag",
+        random_state=global_random_seed,
+        init_params="random",
+        weights_init=np.asarray([0.1, 0.4, 0.5]),
+    )
+
+    xp = _array_api_for_tests(array_namespace, device_)
+    X = xp.asarray(X, device=device_)
+
+    with sklearn.config_context(array_api_dispatch=True):
+        gmm.fit(X)
+
+    # check with weights_init being an array_api_strict array
+    gmm = GaussianMixture(
+        n_components=3,
+        covariance_type="diag",
+        random_state=global_random_seed,
+        init_params="random",
+        weights_init=array_api_strict.asarray([0.1, 0.4, 0.5]),
+    )
+
+    xp = _array_api_for_tests(array_namespace, device_)
+    X = xp.asarray(X, device=device_)
+
+    with sklearn.config_context(array_api_dispatch=True):
+        gmm.fit(X)

From cbc8811f624a3d46ec88d7533329449627a4f1b8 Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Wed, 2 Apr 2025 15:00:55 +0200
Subject: [PATCH 40/92] fix signature and add assert to test

---
 sklearn/mixture/_base.py                       | 2 +-
 sklearn/mixture/tests/test_gaussian_mixture.py | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index 59e9e4240637b..ce71136b6dcb1 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -86,7 +86,7 @@ def __init__(
         self.verbose_interval = verbose_interval
 
     @abstractmethod
-    def _check_parameters(self, X):
+    def _check_parameters(self, X, xp=None):
         """Check initial parameters of the derived class.
 
         Parameters
diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index 6d8e886c7af4f..5309fe595fe2a 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -1522,6 +1522,8 @@ def test_gaussian_mixture_array_api_compliance(
 def test_gaussian_mixture_array_api_with_weights_init(
     array_namespace, device_, dtype, global_random_seed
 ):
+    """Check that passing `weights_init` during instantiation correctly converts to the
+    same namespace as X."""
     X, _ = make_blobs(
         n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed
     )
@@ -1540,6 +1542,8 @@ def test_gaussian_mixture_array_api_with_weights_init(
     with sklearn.config_context(array_api_dispatch=True):
         gmm.fit(X)
 
+        assert device(X) == device(gmm.weights_)
+
 
 # TODO: remove when gmm works with `init_params` `kmeans` or `k-means++`
 @skip_if_array_api_compat_not_configured

From 614f7b51936c22475c6c641c24e35a7c7eecfd76 Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Thu, 3 Apr 2025 09:56:30 +0200
Subject: [PATCH 41/92] some small things

---
 .../array-api/30777.feature.rst               |  2 +-
 .../mixture/tests/test_gaussian_mixture.py    | 85 +++++++++----------
 2 files changed, 41 insertions(+), 46 deletions(-)

diff --git a/doc/whats_new/upcoming_changes/array-api/30777.feature.rst b/doc/whats_new/upcoming_changes/array-api/30777.feature.rst
index ed985fcc77d29..84a1b16855c84 100644
--- a/doc/whats_new/upcoming_changes/array-api/30777.feature.rst
+++ b/doc/whats_new/upcoming_changes/array-api/30777.feature.rst
@@ -1,4 +1,4 @@
 - :class:`sklearn.gaussian_mixture.GaussianMixture` with
-  `initialization="random/random_from_data"` and `covariance_type="diag"` and
+  `init_params` "random" or "random_from_data" and `covariance_type="diag"` and
   `warm_start=False` now supports Array API compatible inputs.
   By :user:`Stefanie Senger <StefanieSenger>` and :user:`Loïc Estève <lesteve>`
diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index 5309fe595fe2a..ee8451c94dc1c 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -1522,8 +1522,8 @@ def test_gaussian_mixture_array_api_compliance(
 def test_gaussian_mixture_array_api_with_weights_init(
     array_namespace, device_, dtype, global_random_seed
 ):
-    """Check that passing `weights_init` during instantiation correctly converts to the
-    same namespace as X."""
+    """Check that array api works with `weights_init`, which unlike other passed arrays
+    is an init param."""
     X, _ = make_blobs(
         n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed
     )
@@ -1545,6 +1545,44 @@ def test_gaussian_mixture_array_api_with_weights_init(
         assert device(X) == device(gmm.weights_)
 
 
+@pytest.mark.parametrize(
+    "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations()
+)
+def test_gaussian_mixture_array_api_different_namespaces(
+    array_namespace, device_, dtype, global_random_seed
+):
+    """Check that passing `weights_init` in a different namespace during instantiation
+    correctly converts to the same namespace as X."""
+    X, _ = make_blobs(
+        n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed
+    )
+
+    xp = _array_api_for_tests(array_namespace, device_)
+    X = xp.asarray(X, device=device_)
+
+    """# check with weights_init being a numpy array
+    with sklearn.config_context(array_api_dispatch=True):
+        gmm = GaussianMixture(
+            n_components=3,
+            covariance_type="diag",
+            random_state=global_random_seed,
+            init_params="random",
+            weights_init=np.asarray([0.1, 0.4, 0.5]),
+        )
+        gmm.fit(X)"""
+
+    # check with weights_init being an array_api_strict array
+    with sklearn.config_context(array_api_dispatch=True):
+        gmm = GaussianMixture(
+            n_components=3,
+            covariance_type="diag",
+            random_state=global_random_seed,
+            init_params="random",
+            weights_init=array_api_strict.asarray([0.1, 0.4, 0.5]),
+        )
+        gmm.fit(X)
+
+
 # TODO: remove when gmm works with `init_params` `kmeans` or `k-means++`
 @skip_if_array_api_compat_not_configured
 @pytest.mark.parametrize("init_params", ["kmeans", "k-means++"])
@@ -1569,46 +1607,3 @@ def test_gaussian_mixture_raises_where_array_api_not_implemented(
             match="Allowed `init_params`.+if 'array_api_dispatch' is enabled",
         ):
             gmm.fit(X)
-
-
-@pytest.mark.parametrize(
-    "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations()
-)
-def test_gaussian_mixture_array_api_different_namespaces(
-    array_namespace, device_, dtype, global_random_seed
-):
-    """Test that array api works if `X` and `weights_init` come from different array
-    namespaces."""
-    X, _ = make_blobs(
-        n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed
-    )
-
-    # check with weights_init being a numpy array
-    gmm = GaussianMixture(
-        n_components=3,
-        covariance_type="diag",
-        random_state=global_random_seed,
-        init_params="random",
-        weights_init=np.asarray([0.1, 0.4, 0.5]),
-    )
-
-    xp = _array_api_for_tests(array_namespace, device_)
-    X = xp.asarray(X, device=device_)
-
-    with sklearn.config_context(array_api_dispatch=True):
-        gmm.fit(X)
-
-    # check with weights_init being an array_api_strict array
-    gmm = GaussianMixture(
-        n_components=3,
-        covariance_type="diag",
-        random_state=global_random_seed,
-        init_params="random",
-        weights_init=array_api_strict.asarray([0.1, 0.4, 0.5]),
-    )
-
-    xp = _array_api_for_tests(array_namespace, device_)
-    X = xp.asarray(X, device=device_)
-
-    with sklearn.config_context(array_api_dispatch=True):
-        gmm.fit(X)

From 90baf84c5f0886cc4a102bdf0d4b123b452c9c95 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Thu, 3 Apr 2025 10:13:55 +0200
Subject: [PATCH 42/92] Fix BayesianGaussianMixture

---
 sklearn/mixture/_bayesian_mixture.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/mixture/_bayesian_mixture.py b/sklearn/mixture/_bayesian_mixture.py
index 83e889984241b..6858e45e1972b 100644
--- a/sklearn/mixture/_bayesian_mixture.py
+++ b/sklearn/mixture/_bayesian_mixture.py
@@ -410,7 +410,7 @@ def __init__(
         self.degrees_of_freedom_prior = degrees_of_freedom_prior
         self.covariance_prior = covariance_prior
 
-    def _check_parameters(self, X):
+    def _check_parameters(self, X, xp=None):
         """Check that the parameters are well defined.
 
         Parameters

From 1e7a3856f68adef7cb2b6b3d531b4b3a705650bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Thu, 3 Apr 2025 10:18:57 +0200
Subject: [PATCH 43/92] Add comment

---
 sklearn/mixture/_bayesian_mixture.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sklearn/mixture/_bayesian_mixture.py b/sklearn/mixture/_bayesian_mixture.py
index 6858e45e1972b..2a62f159b1df6 100644
--- a/sklearn/mixture/_bayesian_mixture.py
+++ b/sklearn/mixture/_bayesian_mixture.py
@@ -417,6 +417,9 @@ def _check_parameters(self, X, xp=None):
         ----------
         X : array-like of shape (n_samples, n_features)
         """
+        # TODO should we pass xp to the check functions in other words
+        # should we test BayesianGaussianMixture array API support?
+        # Maybe we should leave it for a further PR
         self._check_weights_parameters()
         self._check_means_parameters(X)
         self._check_precision_parameters(X)

From e4618cff62b6586077e44e6b6ab3b9c149c4a52f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Thu, 3 Apr 2025 12:05:14 +0200
Subject: [PATCH 44/92] Remove all remaining code using np and make most tests
 pass

---
 sklearn/mixture/_base.py                      |  20 ++--
 sklearn/mixture/_gaussian_mixture.py          |  96 ++++++++-------
 .../mixture/tests/test_gaussian_mixture.py    | 113 ++++++++++++------
 3 files changed, 143 insertions(+), 86 deletions(-)

diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index ce71136b6dcb1..05dca67346ae4 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -290,7 +290,7 @@ def fit_predict(self, X, y=None):
                 ConvergenceWarning,
             )
 
-        self._set_parameters(best_params)
+        self._set_parameters(best_params, xp=xp)
         self.n_iter_ = best_n_iter
         self.lower_bound_ = max_lower_bound
         self.lower_bounds_ = best_lower_bounds
@@ -437,6 +437,9 @@ def sample(self, n_samples=1):
             Component labels.
         """
         check_is_fitted(self)
+        # TODO what is a cleaner way to do this, should we have a self.xp_?
+        # TODO we probably want to use the device as well
+        xp, _, device = get_namespace(self.means_)
 
         if n_samples < 1:
             raise ValueError(
@@ -449,7 +452,7 @@ def sample(self, n_samples=1):
         n_samples_comp = rng.multinomial(n_samples, self.weights_)
 
         if self.covariance_type == "full":
-            X = np.vstack(
+            X = xp.concat(
                 [
                     rng.multivariate_normal(mean, covariance, int(sample))
                     for (mean, covariance, sample) in zip(
@@ -458,26 +461,26 @@ def sample(self, n_samples=1):
                 ]
             )
         elif self.covariance_type == "tied":
-            X = np.vstack(
+            X = xp.concat(
                 [
                     rng.multivariate_normal(mean, self.covariances_, int(sample))
                     for (mean, sample) in zip(self.means_, n_samples_comp)
                 ]
             )
         else:
-            X = np.vstack(
+            X = xp.concat(
                 [
                     mean
                     + rng.standard_normal(size=(sample, n_features))
-                    * np.sqrt(covariance)
+                    * xp.sqrt(covariance)
                     for (mean, covariance, sample) in zip(
                         self.means_, self.covariances_, n_samples_comp
                     )
                 ]
             )
 
-        y = np.concatenate(
-            [np.full(sample, j, dtype=int) for j, sample in enumerate(n_samples_comp)]
+        y = xp.concat(
+            [xp.full(sample, j, dtype=int) for j, sample in enumerate(n_samples_comp)]
         )
 
         return (X, y)
@@ -544,6 +547,9 @@ def _estimate_log_prob_resp(self, X, xp=None):
         weighted_log_prob = self._estimate_weighted_log_prob(X, xp=xp)
         log_prob_norm = _logsumexp(weighted_log_prob, axis=1, xp=xp)
 
+        # TODO np.errstate not in the array API spec, decide what to do here
+        # maybe something like this
+        #  context_manager = np.errstate(under="ignore") if xp is np else nullcontext
         with np.errstate(under="ignore"):
             # ignore underflow
             log_resp = weighted_log_prob - log_prob_norm[:, xp.newaxis]
diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index 4d19718955d00..1e89bc5f3dbab 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -2,8 +2,8 @@
 
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+import math
 
-import numpy as np
 from scipy import linalg
 
 from .._config import get_config
@@ -54,7 +54,7 @@ def _check_weights(weights, n_components, xp=None):
     return weights
 
 
-def _check_means(means, n_components, n_features):
+def _check_means(means, n_components, n_features, xp=None):
     """Validate the provided 'means'.
 
     Parameters
@@ -72,34 +72,39 @@ def _check_means(means, n_components, n_features):
     -------
     means : array, (n_components, n_features)
     """
-    means = check_array(means, dtype=[np.float64, np.float32], ensure_2d=False)
+    xp, _ = get_namespace(means, xp=xp)
+    means = check_array(means, dtype=[xp.float64, xp.float32], ensure_2d=False)
     _check_shape(means, (n_components, n_features), "means")
     return means
 
 
-def _check_precision_positivity(precision, covariance_type):
+def _check_precision_positivity(precision, covariance_type, xp=None):
     """Check a precision vector is positive-definite."""
-    if np.any(np.less_equal(precision, 0.0)):
+    xp, _ = get_namespace(precision, xp=xp)
+    if xp.any(xp.less_equal(precision, 0.0)):
         raise ValueError("'%s precision' should be positive" % covariance_type)
 
 
-def _check_precision_matrix(precision, covariance_type):
+def _check_precision_matrix(precision, covariance_type, xp=None):
     """Check a precision matrix is symmetric and positive-definite."""
+    xp, _ = get_namespace(precision, xp=xp)
     if not (
-        np.allclose(precision, precision.T) and np.all(linalg.eigvalsh(precision) > 0.0)
+        xpx.isclose(precision, precision.T)
+        and xp.all(xp.linalg.eigvalsh(precision) > 0.0)
     ):
         raise ValueError(
             "'%s precision' should be symmetric, positive-definite" % covariance_type
         )
 
 
-def _check_precisions_full(precisions, covariance_type):
+def _check_precisions_full(precisions, covariance_type, xp=None):
     """Check the precision matrices are symmetric and positive-definite."""
+    xp, _ = get_namespace(precisions, xp=xp)
     for prec in precisions:
-        _check_precision_matrix(prec, covariance_type)
+        _check_precision_matrix(prec, covariance_type, xp=xp)
 
 
-def _check_precisions(precisions, covariance_type, n_components, n_features):
+def _check_precisions(precisions, covariance_type, n_components, n_features, xp=None):
     """Validate user provided precisions.
 
     Parameters
@@ -122,9 +127,10 @@ def _check_precisions(precisions, covariance_type, n_components, n_features):
     -------
     precisions : array
     """
+    xp, _ = get_namespace(precisions, xp=xp)
     precisions = check_array(
         precisions,
-        dtype=[np.float64, np.float32],
+        dtype=[xp.float64, xp.float32],
         ensure_2d=False,
         allow_nd=covariance_type == "full",
     )
@@ -145,7 +151,7 @@ def _check_precisions(precisions, covariance_type, n_components, n_features):
         "diag": _check_precision_positivity,
         "spherical": _check_precision_positivity,
     }
-    _check_precisions[covariance_type](precisions, covariance_type)
+    _check_precisions[covariance_type](precisions, covariance_type, xp=xp)
     return precisions
 
 
@@ -204,12 +210,11 @@ def _estimate_gaussian_covariances_tied(resp, X, nk, means, reg_covar, xp=None):
     covariance : array, shape (n_features, n_features)
         The tied covariance matrix of the components.
     """
-    # TODO still using np here ...
-    avg_X2 = np.dot(X.T, X)
-    avg_means2 = np.dot(nk * means.T, means)
+    avg_X2 = X.T @ X
+    avg_means2 = nk * means.T @ means
     covariance = avg_X2 - avg_means2
-    covariance /= nk.sum()
-    covariance.flat[:: len(covariance) + 1] += reg_covar
+    covariance /= xp.sum(nk)
+    covariance[:, 0] += reg_covar
     return covariance
 
 
@@ -323,7 +328,7 @@ def _compute_precision_cholesky(covariances, covariance_type, xp=None):
         The cholesky decomposition of sample precisions of the current
         components. The shape depends of the covariance_type.
     """
-    xp, _ = get_namespace(covariances, xp=xp)
+    xp, _, device_ = get_namespace_and_device(covariances, xp=xp)
 
     estimate_precision_error_message = (
         "Fitting the mixture model failed because some components have "
@@ -358,11 +363,16 @@ def _compute_precision_cholesky(covariances, covariance_type, xp=None):
     elif covariance_type == "tied":
         _, n_features = covariances.shape
         try:
-            cov_chol = linalg.cholesky(covariances, lower=True)
+            # TODO we are using xp.linalg instead of scipy.linalg.cholesky, maybe
+            # separate branches for array API and numpy?
+            cov_chol = xp.linalg.cholesky(covariances)
         except linalg.LinAlgError:
             raise ValueError(estimate_precision_error_message)
-        precisions_chol = linalg.solve_triangular(
-            cov_chol, xp.eye(n_features, dtype=dtype), lower=True
+        # TODO we are using xp.linalg.solve instead of scipy.linalg.solve_triangular
+        # probably separate branches for array API and numpy? maybe
+        # https://github.com/scikit-learn/scikit-learn/pull/29318 is relevant
+        precisions_chol = xp.linalg.solve(
+            cov_chol, xp.eye(n_features, dtype=dtype, device=device_)
         ).T
     else:
         if xp.any(covariances <= 0.0):
@@ -371,9 +381,10 @@ def _compute_precision_cholesky(covariances, covariance_type, xp=None):
     return precisions_chol
 
 
-def _flipudlr(array):
+def _flipudlr(array, xp=None):
     """Reverse the rows and columns of an array."""
-    return np.flipud(np.fliplr(array))
+    xp, _ = get_namespace(array, xp=xp)
+    return xp.flip(xp.flip(array, axis=1), axis=0)
 
 
 def _compute_precision_cholesky_from_precisions(precisions, covariance_type, xp=None):
@@ -410,20 +421,19 @@ def _compute_precision_cholesky_from_precisions(precisions, covariance_type, xp=
         The cholesky decomposition of sample precisions of the current
         components. The shape depends on the covariance_type.
     """
-    # TODO still using np here ...
     if covariance_type == "full":
-        precisions_cholesky = np.array(
+        precisions_cholesky = xp.asarray(
             [
-                _flipudlr(linalg.cholesky(_flipudlr(precision), lower=True))
+                _flipudlr(xp.linalg.cholesky(_flipudlr(precision, xp=xp)), xp=xp)
                 for precision in precisions
             ]
         )
     elif covariance_type == "tied":
         precisions_cholesky = _flipudlr(
-            linalg.cholesky(_flipudlr(precisions), lower=True)
+            xp.linalg.cholesky(_flipudlr(precisions, xp=xp)), xp=xp
         )
     else:
-        precisions_cholesky = np.sqrt(precisions)
+        precisions_cholesky = xp.sqrt(precisions)
     return precisions_cholesky
 
 
@@ -459,7 +469,7 @@ def _compute_log_det_cholesky(matrix_chol, covariance_type, n_features, xp=None)
         )
 
     elif covariance_type == "tied":
-        log_det_chol = xp.sum(xp.log(xp.diagonal(matrix_chol)))
+        log_det_chol = xp.sum(xp.log(xp.linalg.diagonal(matrix_chol)))
 
     elif covariance_type == "diag":
         log_det_chol = xp.sum(xp.log(matrix_chol), axis=1)
@@ -492,7 +502,7 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type, xp=N
     -------
     log_prob : array, shape (n_samples, n_components)
     """
-    xp, _, device = get_namespace_and_device(X, means, precisions_chol, xp=xp)
+    xp, _, device_ = get_namespace_and_device(X, means, precisions_chol, xp=xp)
     n_samples, n_features = X.shape
     n_components, _ = means.shape
     # The determinant of the precision matrix from the Cholesky decomposition
@@ -502,14 +512,15 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type, xp=N
     log_det = _compute_log_det_cholesky(precisions_chol, covariance_type, n_features)
 
     if covariance_type == "full":
-        log_prob = xp.empty((n_samples, n_components), dtype=X.dtype)
+        log_prob = xp.empty((n_samples, n_components), dtype=X.dtype, device=device_)
         for k, (mu, prec_chol) in enumerate(zip(means, precisions_chol)):
             y = (X @ prec_chol) - (mu @ prec_chol)
             log_prob[:, k] = xp.sum(xp.square(y), axis=1)
 
     elif covariance_type == "tied":
-        log_prob = xp.empty((n_samples, n_components), dtype=X.dtype)
-        for k, mu in enumerate(means):
+        log_prob = xp.empty((n_samples, n_components), dtype=X.dtype, device=device_)
+        for k in range(means.shape[0]):
+            mu = means[k, :]
             y = (X @ precisions_chol) - (mu @ precisions_chol)
             log_prob[:, k] = xp.sum(xp.square(y), axis=1)
 
@@ -533,7 +544,7 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type, xp=N
     return (
         -0.5
         * (
-            n_features * xp.log(xp.asarray(2 * xp.pi, dtype=X.dtype, device=device))
+            n_features * xp.log(xp.asarray(2 * xp.pi, dtype=X.dtype, device=device_))
             + log_prob
         )
         + log_det
@@ -788,7 +799,7 @@ def _check_parameters(self, X, xp=None):
 
         if self.means_init is not None:
             self.means_init = _check_means(
-                self.means_init, self.n_components, n_features
+                self.means_init, self.n_components, n_features, xp=xp
             )
 
         if self.precisions_init is not None:
@@ -797,6 +808,7 @@ def _check_parameters(self, X, xp=None):
                 self.covariance_type,
                 self.n_components,
                 n_features,
+                xp=xp,
             )
 
         allowed_init_params = ["random", "random_from_data"]
@@ -901,7 +913,8 @@ def _get_parameters(self):
             self.precisions_cholesky_,
         )
 
-    def _set_parameters(self, params):
+    def _set_parameters(self, params, xp=None):
+        xp, _, device_ = get_namespace_and_device(params, xp=xp)
         (
             self.weights_,
             self.means_,
@@ -914,14 +927,13 @@ def _set_parameters(self, params):
 
         dtype = self.precisions_cholesky_.dtype
         if self.covariance_type == "full":
-            self.precisions_ = np.empty_like(self.precisions_cholesky_)
+            self.precisions_ = xp.empty_like(self.precisions_cholesky_, device=device_)
             for k, prec_chol in enumerate(self.precisions_cholesky_):
-                self.precisions_[k] = np.dot(prec_chol, prec_chol.T)
+                self.precisions_[k] = prec_chol @ prec_chol.T
 
         elif self.covariance_type == "tied":
-            self.precisions_ = np.dot(
-                self.precisions_cholesky_, self.precisions_cholesky_.T
-            )
+            self.precisions_ = self.precisions_cholesky_ @ self.precisions_cholesky_.T
+
         else:
             self.precisions_ = self.precisions_cholesky_**2
 
@@ -958,7 +970,7 @@ def bic(self, X):
         bic : float
             The lower the better.
         """
-        return -2 * self.score(X) * X.shape[0] + self._n_parameters() * np.log(
+        return -2 * self.score(X) * X.shape[0] + self._n_parameters() * math.log(
             X.shape[0]
         )
 
diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index ee8451c94dc1c..1226295335b65 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -9,7 +9,6 @@
 from io import StringIO
 from unittest.mock import Mock
 
-import array_api_strict
 import numpy as np
 import pytest
 from scipy import linalg, stats
@@ -1515,6 +1514,9 @@ def test_gaussian_mixture_array_api_compliance(
     assert_allclose(means_, _convert_to_numpy(gmm.means_, xp=xp))
     assert_allclose(covariances_, _convert_to_numpy(gmm.covariances_, xp=xp))
 
+    # TODO Maybe we should test the sample method
+    # TODO test means_init and precisions_init
+
 
 @pytest.mark.parametrize(
     "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations()
@@ -1545,42 +1547,45 @@ def test_gaussian_mixture_array_api_with_weights_init(
         assert device(X) == device(gmm.weights_)
 
 
-@pytest.mark.parametrize(
-    "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations()
-)
-def test_gaussian_mixture_array_api_different_namespaces(
-    array_namespace, device_, dtype, global_random_seed
-):
-    """Check that passing `weights_init` in a different namespace during instantiation
-    correctly converts to the same namespace as X."""
-    X, _ = make_blobs(
-        n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed
-    )
-
-    xp = _array_api_for_tests(array_namespace, device_)
-    X = xp.asarray(X, device=device_)
-
-    """# check with weights_init being a numpy array
-    with sklearn.config_context(array_api_dispatch=True):
-        gmm = GaussianMixture(
-            n_components=3,
-            covariance_type="diag",
-            random_state=global_random_seed,
-            init_params="random",
-            weights_init=np.asarray([0.1, 0.4, 0.5]),
-        )
-        gmm.fit(X)"""
-
-    # check with weights_init being an array_api_strict array
-    with sklearn.config_context(array_api_dispatch=True):
-        gmm = GaussianMixture(
-            n_components=3,
-            covariance_type="diag",
-            random_state=global_random_seed,
-            init_params="random",
-            weights_init=array_api_strict.asarray([0.1, 0.4, 0.5]),
-        )
-        gmm.fit(X)
+# TODO What is the expected behavior when weights init
+# and X are not in the same namespace/device?
+# It feels like check_array would need a xp argument?
+# @pytest.mark.parametrize(
+#     "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations()
+# )
+# def test_gaussian_mixture_array_api_different_namespaces(
+#     array_namespace, device_, dtype, global_random_seed
+# ):
+#     """Check that passing `weights_init` in a different namespace during instantiation
+#     correctly converts to the same namespace as X."""
+#     X, _ = make_blobs(
+#         n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed
+#     )
+
+#     xp = _array_api_for_tests(array_namespace, device_)
+#     X = xp.asarray(X, device=device_)
+
+#     # check with weights_init being a numpy array
+#     with sklearn.config_context(array_api_dispatch=True):
+#         gmm = GaussianMixture(
+#             n_components=3,
+#             covariance_type="diag",
+#             random_state=global_random_seed,
+#             init_params="random",
+#             weights_init=np.asarray([0.1, 0.4, 0.5]),
+#         )
+#         gmm.fit(X)
+
+#     # check with weights_init being an array_api_strict array
+#     with sklearn.config_context(array_api_dispatch=True):
+#         gmm = GaussianMixture(
+#             n_components=3,
+#             covariance_type="diag",
+#             random_state=global_random_seed,
+#             init_params="random",
+#             weights_init=array_api_strict.asarray([0.1, 0.4, 0.5]),
+#         )
+#         gmm.fit(X)
 
 
 # TODO: remove when gmm works with `init_params` `kmeans` or `k-means++`
@@ -1607,3 +1612,37 @@ def test_gaussian_mixture_raises_where_array_api_not_implemented(
             match="Allowed `init_params`.+if 'array_api_dispatch' is enabled",
         ):
             gmm.fit(X)
+
+
+@pytest.mark.parametrize("init_params", ["random", "random_from_data"])
+@pytest.mark.parametrize(
+    "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations()
+)
+def test_gaussian_mixture_array_api_compliance_covariance_type_tied(
+    init_params, array_namespace, device_, dtype, global_random_seed
+):
+    X, _ = make_blobs(
+        n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed
+    )
+    gmm = GaussianMixture(
+        n_components=3,
+        covariance_type="tied",
+        random_state=global_random_seed,
+        init_params=init_params,
+    )
+
+    gmm.fit(X)
+    means_ = gmm.means_
+    covariances_ = gmm.covariances_
+
+    xp = _array_api_for_tests(array_namespace, device_)
+    X = xp.asarray(X, device=device_)
+
+    with sklearn.config_context(array_api_dispatch=True):
+        gmm.fit(X)
+
+        assert device(X) == device(gmm.means_)
+        assert device(X) == device(gmm.covariances_)
+
+    assert_allclose(means_, _convert_to_numpy(gmm.means_, xp=xp))
+    assert_allclose(covariances_, _convert_to_numpy(gmm.covariances_, xp=xp))

From 2b80ac921e6c5ed81a8ec5ea310a0c0c97c7912e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Thu, 3 Apr 2025 14:03:07 +0200
Subject: [PATCH 45/92] Fix easy failures

---
 sklearn/mixture/_base.py             | 2 +-
 sklearn/mixture/_bayesian_mixture.py | 2 +-
 sklearn/mixture/_gaussian_mixture.py | 5 +++--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index 05dca67346ae4..43c883de32f64 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -439,7 +439,7 @@ def sample(self, n_samples=1):
         check_is_fitted(self)
         # TODO what is a cleaner way to do this, should we have a self.xp_?
         # TODO we probably want to use the device as well
-        xp, _, device = get_namespace(self.means_)
+        xp, _ = get_namespace(self.means_)
 
         if n_samples < 1:
             raise ValueError(
diff --git a/sklearn/mixture/_bayesian_mixture.py b/sklearn/mixture/_bayesian_mixture.py
index 2a62f159b1df6..9a991f19d3d40 100644
--- a/sklearn/mixture/_bayesian_mixture.py
+++ b/sklearn/mixture/_bayesian_mixture.py
@@ -850,7 +850,7 @@ def _get_parameters(self):
             self.precisions_cholesky_,
         )
 
-    def _set_parameters(self, params):
+    def _set_parameters(self, params, xp=None):
         (
             self.weight_concentration_,
             self.mean_precision_,
diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index 1e89bc5f3dbab..35bbaadb55d42 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -46,7 +46,7 @@ def _check_weights(weights, n_components, xp=None):
 
     # check normalization
     atol = 1e-6 if weights.dtype == xp.float32 else 1e-8
-    if not xpx.isclose(xp.abs(1.0 - xp.sum(weights)), 0.0, atol=atol, xp=xp):
+    if not xp.all(xpx.isclose(xp.abs(1.0 - xp.sum(weights)), 0.0, atol=atol, xp=xp)):
         raise ValueError(
             "The parameter 'weights' should be normalized, but got sum(weights) = %.5f"
             % xp.sum(weights)
@@ -89,7 +89,7 @@ def _check_precision_matrix(precision, covariance_type, xp=None):
     """Check a precision matrix is symmetric and positive-definite."""
     xp, _ = get_namespace(precision, xp=xp)
     if not (
-        xpx.isclose(precision, precision.T)
+        xp.all(xpx.isclose(precision, precision.T))
         and xp.all(xp.linalg.eigvalsh(precision) > 0.0)
     ):
         raise ValueError(
@@ -210,6 +210,7 @@ def _estimate_gaussian_covariances_tied(resp, X, nk, means, reg_covar, xp=None):
     covariance : array, shape (n_features, n_features)
         The tied covariance matrix of the components.
     """
+    xp, _ = get_namespace(X, means, xp=xp)
     avg_X2 = X.T @ X
     avg_means2 = nk * means.T @ means
     covariance = avg_X2 - avg_means2

From 3287a5006082a6473dc356d605f52a60327141bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Thu, 3 Apr 2025 14:27:58 +0200
Subject: [PATCH 46/92] Fix [azure parallel]

---
 sklearn/mixture/_gaussian_mixture.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index 35bbaadb55d42..5b4ca4e646385 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -215,7 +215,8 @@ def _estimate_gaussian_covariances_tied(resp, X, nk, means, reg_covar, xp=None):
     avg_means2 = nk * means.T @ means
     covariance = avg_X2 - avg_means2
     covariance /= xp.sum(nk)
-    covariance[:, 0] += reg_covar
+    my_flat = xp.reshape(covariance, (-1,))
+    my_flat[:: covariance.shape[0] + 1] += reg_covar
     return covariance
 
 
From fb72f790d2af544df80cf1022144c7b16817f772 Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Fri, 4 Apr 2025 14:08:46 +0200
Subject: [PATCH 47/92] array api support for covariance type 'full' + test

---
 .../array-api/30777.feature.rst               |  4 +-
 sklearn/mixture/_gaussian_mixture.py          | 35 ++++++++++------
 .../mixture/tests/test_gaussian_mixture.py    | 42 +++----------------
 3 files changed, 29 insertions(+), 52 deletions(-)

diff --git a/doc/whats_new/upcoming_changes/array-api/30777.feature.rst b/doc/whats_new/upcoming_changes/array-api/30777.feature.rst
index 84a1b16855c84..b3f0751fa0a0d 100644
--- a/doc/whats_new/upcoming_changes/array-api/30777.feature.rst
+++ b/doc/whats_new/upcoming_changes/array-api/30777.feature.rst
@@ -1,4 +1,4 @@
 - :class:`sklearn.gaussian_mixture.GaussianMixture` with
-  `init_params` "random" or "random_from_data" and `covariance_type="diag"` and
-  `warm_start=False` now supports Array API compatible inputs.
+  `init_params` `"random"` or `"random_from_data"` and `warm_start=False` now supports
+  Array API compatible inputs.
   By :user:`Stefanie Senger <StefanieSenger>` and :user:`Loïc Estève <lesteve>`
diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index 5b4ca4e646385..6a5194ce76f60 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -179,13 +179,15 @@ def _estimate_gaussian_covariances_full(resp, X, nk, means, reg_covar, xp=None):
     covariances : array, shape (n_components, n_features, n_features)
         The covariance matrix of the current components.
     """
-    xp, _ = get_namespace(X, xp=xp)
+    xp, _, device_ = get_namespace_and_device(X, xp=xp)
     n_components, n_features = means.shape
-    covariances = xp.empty((n_components, n_features, n_features), dtype=X.dtype)
+    covariances = xp.empty(
+        (n_components, n_features, n_features), device=device_, dtype=X.dtype
+    )
     for k in range(n_components):
-        diff = X - means[k]
-        covariances[k] = ((resp[:, k] * diff.T) @ diff) / nk[k]
-        my_flat = xp.reshape(covariances[k], (-1,))
+        diff = X - means[k, ...]
+        covariances[k, ...] = ((resp[:, k] * diff.T) @ diff) / nk[k]
+        my_flat = xp.reshape(covariances[k, ...], (-1,))
         my_flat[:: n_features + 1] += reg_covar
     return covariances
 
@@ -347,8 +349,11 @@ def _compute_precision_cholesky(covariances, covariance_type, xp=None):
 
     if covariance_type == "full":
         n_components, n_features, _ = covariances.shape
-        precisions_chol = xp.empty((n_components, n_features, n_features), dtype=dtype)
-        for k, covariance in enumerate(covariances):
+        precisions_chol = xp.empty(
+            (n_components, n_features, n_features), device=device_, dtype=dtype
+        )
+        for k in range(covariances.shape[0]):
+            covariance = covariances[k, ...]
             try:
                 # TODO we are using xp.linalg instead of scipy.linalg.cholesky, maybe
                 # separate branches for array API and numpy?
@@ -359,8 +364,8 @@ def _compute_precision_cholesky(covariances, covariance_type, xp=None):
             # TODO we are using xp.linalg.solve instead of scipy.linalg.solve_triangular
             # probably separate branches for array API and numpy? maybe
             # https://github.com/scikit-learn/scikit-learn/pull/29318 is relevant
-            precisions_chol[k] = xp.linalg.solve(
-                cov_chol, xp.eye(n_features, dtype=dtype)
+            precisions_chol[k, ...] = xp.linalg.solve(
+                cov_chol, xp.eye(n_features, device=device_, dtype=dtype)
             ).T
     elif covariance_type == "tied":
         _, n_features = covariances.shape
@@ -467,7 +472,8 @@ def _compute_log_det_cholesky(matrix_chol, covariance_type, n_features, xp=None)
     if covariance_type == "full":
         n_components, _, _ = matrix_chol.shape
         log_det_chol = xp.sum(
-            xp.log(matrix_chol.reshape(n_components, -1)[:, :: n_features + 1]), axis=1
+            xp.log(xp.reshape(matrix_chol, (n_components, -1))[:, :: n_features + 1]),
+            axis=1,
         )
 
     elif covariance_type == "tied":
@@ -515,7 +521,9 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type, xp=N
 
     if covariance_type == "full":
         log_prob = xp.empty((n_samples, n_components), dtype=X.dtype, device=device_)
-        for k, (mu, prec_chol) in enumerate(zip(means, precisions_chol)):
+        for k in range(means.shape[0]):
+            mu = means[k, ...]
+            prec_chol = precisions_chol[k, ...]
             y = (X @ prec_chol) - (mu @ prec_chol)
             log_prob[:, k] = xp.sum(xp.square(y), axis=1)
 
@@ -930,8 +938,9 @@ def _set_parameters(self, params, xp=None):
         dtype = self.precisions_cholesky_.dtype
         if self.covariance_type == "full":
             self.precisions_ = xp.empty_like(self.precisions_cholesky_, device=device_)
-            for k, prec_chol in enumerate(self.precisions_cholesky_):
-                self.precisions_[k] = prec_chol @ prec_chol.T
+            for k in range(self.precisions_cholesky_.shape[0]):
+                prec_chol = self.precisions_cholesky_[k, ...]
+                self.precisions_[k, ...] = prec_chol @ prec_chol.T
 
         elif self.covariance_type == "tied":
             self.precisions_ = self.precisions_cholesky_ @ self.precisions_cholesky_.T
diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index 1226295335b65..a222addbc0ace 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -1482,18 +1482,20 @@ def test_gaussian_mixture_all_init_does_not_estimate_gaussian_parameters(
 
 
 @pytest.mark.parametrize("init_params", ["random", "random_from_data"])
+@pytest.mark.parametrize("covariance_type", ["full", "tied", "diag"])
 @pytest.mark.parametrize(
     "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations()
 )
 def test_gaussian_mixture_array_api_compliance(
-    init_params, array_namespace, device_, dtype, global_random_seed
+    init_params, covariance_type, array_namespace, device_, dtype, global_random_seed
 ):
+    """Test that array api works in GaussianMixtrue.fit."""
     X, _ = make_blobs(
         n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed
     )
     gmm = GaussianMixture(
         n_components=3,
-        covariance_type="diag",
+        covariance_type=covariance_type,
         random_state=global_random_seed,
         init_params=init_params,
     )
@@ -1521,7 +1523,7 @@ def test_gaussian_mixture_array_api_compliance(
 @pytest.mark.parametrize(
     "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations()
 )
-def test_gaussian_mixture_array_api_with_weights_init(
+def test_gaussian_mixture_array_api_compliance_with_weights_init(
     array_namespace, device_, dtype, global_random_seed
 ):
     """Check that array api works with `weights_init`, which unlike other passed arrays
@@ -1612,37 +1614,3 @@ def test_gaussian_mixture_raises_where_array_api_not_implemented(
             match="Allowed `init_params`.+if 'array_api_dispatch' is enabled",
         ):
             gmm.fit(X)
-
-
-@pytest.mark.parametrize("init_params", ["random", "random_from_data"])
-@pytest.mark.parametrize(
-    "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations()
-)
-def test_gaussian_mixture_array_api_compliance_covariance_type_tied(
-    init_params, array_namespace, device_, dtype, global_random_seed
-):
-    X, _ = make_blobs(
-        n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed
-    )
-    gmm = GaussianMixture(
-        n_components=3,
-        covariance_type="tied",
-        random_state=global_random_seed,
-        init_params=init_params,
-    )
-
-    gmm.fit(X)
-    means_ = gmm.means_
-    covariances_ = gmm.covariances_
-
-    xp = _array_api_for_tests(array_namespace, device_)
-    X = xp.asarray(X, device=device_)
-
-    with sklearn.config_context(array_api_dispatch=True):
-        gmm.fit(X)
-
-        assert device(X) == device(gmm.means_)
-        assert device(X) == device(gmm.covariances_)
-
-    assert_allclose(means_, _convert_to_numpy(gmm.means_, xp=xp))
-    assert_allclose(covariances_, _convert_to_numpy(gmm.covariances_, xp=xp))

From 964199700266de5d9626e36622c2ac52b492b035 Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Mon, 7 Apr 2025 12:27:01 +0200
Subject: [PATCH 48/92] fix support for covariance_type='spherical'

---
 sklearn/mixture/_gaussian_mixture.py           | 12 +++++++-----
 sklearn/mixture/tests/test_gaussian_mixture.py |  2 +-
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index 6a5194ce76f60..116e5db61414d 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -268,9 +268,11 @@ def _estimate_gaussian_covariances_spherical(resp, X, nk, means, reg_covar, xp=N
     variances : array, shape (n_components,)
         The variance values of each components.
     """
-    return _estimate_gaussian_covariances_diag(
-        resp, X, nk, means, reg_covar, xp=xp
-    ).mean(1)
+    xp, _ = get_namespace(X)
+    return xp.mean(
+        _estimate_gaussian_covariances_diag(resp, X, nk, means, reg_covar, xp=xp),
+        axis=1,
+    )
 
 
 def _estimate_gaussian_parameters(X, resp, reg_covar, covariance_type, xp=None):
@@ -545,9 +547,9 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type, xp=N
     elif covariance_type == "spherical":
         precisions = precisions_chol**2
         log_prob = (
-            xp.sum(means**2, 1) * precisions
+            xp.sum(means**2, axis=1) * precisions
             - 2 * (X @ means.T * precisions)
-            + xp.outer(row_norms(X, squared=True), precisions)
+            + xp.linalg.outer(row_norms(X, squared=True), precisions)
         )
     # Since we are using the precision of the Cholesky decomposition,
     # `- 0.5 * log_det_precision` becomes `+ log_det_precision_chol`
diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index a222addbc0ace..ecd2c3cd3cbf1 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -1482,7 +1482,7 @@ def test_gaussian_mixture_all_init_does_not_estimate_gaussian_parameters(
 
 
 @pytest.mark.parametrize("init_params", ["random", "random_from_data"])
-@pytest.mark.parametrize("covariance_type", ["full", "tied", "diag"])
+@pytest.mark.parametrize("covariance_type", ["full", "tied", "diag", "spherical"])
 @pytest.mark.parametrize(
     "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations()
 )

From 35a464409c7c24836cc33376875592e3d1c9186c Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Mon, 7 Apr 2025 14:09:13 +0200
Subject: [PATCH 49/92] add test for GaussianMixture.sample()

---
 sklearn/mixture/_base.py                      |  4 +--
 .../mixture/tests/test_gaussian_mixture.py    | 35 +++++++++++++++++--
 2 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index 43c883de32f64..16691cd1ea403 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -31,7 +31,6 @@ def _check_shape(param, param_shape, name):
 
     name : str
     """
-    # param = xp.array(param)
     if param.shape != param_shape:
         raise ValueError(
             "The parameter '%s' should have the shape of %s, but got %s"
@@ -438,8 +437,7 @@ def sample(self, n_samples=1):
         """
         check_is_fitted(self)
         # TODO what is a cleaner way to do this, should we have a self.xp_?
-        # TODO we probably want to use the device as well
-        xp, _ = get_namespace(self.means_)
+        xp, _, device_ = get_namespace_and_device(self.means_)
 
         if n_samples < 1:
             raise ValueError(
diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index ecd2c3cd3cbf1..6f51b5242205a 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -33,6 +33,7 @@
 from sklearn.utils._array_api import (
     _convert_to_numpy,
     device,
+    get_namespace,
     yield_namespace_device_dtype_combinations,
 )
 from sklearn.utils._testing import (
@@ -1489,7 +1490,7 @@ def test_gaussian_mixture_all_init_does_not_estimate_gaussian_parameters(
 def test_gaussian_mixture_array_api_compliance(
     init_params, covariance_type, array_namespace, device_, dtype, global_random_seed
 ):
-    """Test that array api works in GaussianMixtrue.fit."""
+    """Test that array api works in GaussianMixture.fit()."""
     X, _ = make_blobs(
         n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed
     )
@@ -1516,7 +1517,6 @@ def test_gaussian_mixture_array_api_compliance(
     assert_allclose(means_, _convert_to_numpy(gmm.means_, xp=xp))
     assert_allclose(covariances_, _convert_to_numpy(gmm.covariances_, xp=xp))
 
-    # TODO Maybe we should test the sample method
     # TODO test means_init and precisions_init
 
 
@@ -1614,3 +1614,34 @@ def test_gaussian_mixture_raises_where_array_api_not_implemented(
             match="Allowed `init_params`.+if 'array_api_dispatch' is enabled",
         ):
             gmm.fit(X)
+
+
+@pytest.mark.parametrize("covariance_type", ["full", "tied", "diag"])
+@pytest.mark.parametrize(
+    "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations()
+)
+def test_gaussian_mixture_sample_array_api_compliance(
+    covariance_type, array_namespace, device_, dtype, global_random_seed
+):
+    """Test that array api works in GaussianMixture.sample()."""
+    xp = _array_api_for_tests(array_namespace, device_)
+    X, _ = make_blobs(
+        n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed
+    )
+    X = xp.asarray(X, device=device_)
+
+    with sklearn.config_context(array_api_dispatch=True):
+        gmm = GaussianMixture(
+            n_components=3,
+            covariance_type=covariance_type,
+            random_state=global_random_seed,
+            init_params="random",
+        )
+        gmm.fit(X)
+        X_sample, y_sample = gmm.sample()
+
+        assert get_namespace(X_sample)[0] == xp
+        assert get_namespace(y_sample)[0] == xp
+
+        assert device(X_sample) == device(X)
+        assert device(y_sample) == device(X)

From 502d3e680738b2e2084d212c57d2bc799219bba9 Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Mon, 7 Apr 2025 15:20:24 +0200
Subject: [PATCH 50/92] fix array api support in sample() with
 covariance_type='full'

---
 sklearn/mixture/_base.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index 16691cd1ea403..09ff25329218d 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -452,10 +452,15 @@ def sample(self, n_samples=1):
         if self.covariance_type == "full":
             X = xp.concat(
                 [
-                    rng.multivariate_normal(mean, covariance, int(sample))
-                    for (mean, covariance, sample) in zip(
-                        self.means_, self.covariances_, n_samples_comp
+                    xp.asarray(
+                        rng.multivariate_normal(
+                            self.means_[i, ...],
+                            self.covariances_[i, ...],
+                            int(n_samples_comp[i]),
+                        )
                     )
+                    for i in range(len(n_samples_comp))
+                    if n_samples_comp[i] > 0
                 ]
             )
         elif self.covariance_type == "tied":
@@ -478,7 +483,10 @@ def sample(self, n_samples=1):
             )
 
         y = xp.concat(
-            [xp.full(sample, j, dtype=int) for j, sample in enumerate(n_samples_comp)]
+            [
+                xp.full(sample, j, dtype=xp.int32)
+                for j, sample in enumerate(n_samples_comp)
+            ]
         )
 
         return (X, y)

From 148381dd6f99d4e466f8152ffdd3f195d4768fb4 Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Mon, 7 Apr 2025 15:35:03 +0200
Subject: [PATCH 51/92] fix array api support in sample() with other
 covariance_types for array_api_strict namespace

---
 sklearn/mixture/_base.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index 09ff25329218d..3f95bf56d33d2 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -460,25 +460,30 @@ def sample(self, n_samples=1):
                         )
                     )
                     for i in range(len(n_samples_comp))
-                    if n_samples_comp[i] > 0
                 ]
             )
         elif self.covariance_type == "tied":
             X = xp.concat(
                 [
-                    rng.multivariate_normal(mean, self.covariances_, int(sample))
-                    for (mean, sample) in zip(self.means_, n_samples_comp)
+                    xp.asarray(
+                        rng.multivariate_normal(
+                            self.means_[i, ...],
+                            self.covariances_,
+                            int(n_samples_comp[i]),
+                        )
+                    )
+                    for i in range(len(n_samples_comp))
                 ]
             )
         else:
             X = xp.concat(
                 [
-                    mean
-                    + rng.standard_normal(size=(sample, n_features))
-                    * xp.sqrt(covariance)
-                    for (mean, covariance, sample) in zip(
-                        self.means_, self.covariances_, n_samples_comp
+                    self.means_[i, ...]
+                    + xp.asarray(
+                        rng.standard_normal(size=(n_samples_comp[i, ...], n_features))
                     )
+                    * xp.sqrt(self.covariances_[i, ...])
+                    for i in range(len(n_samples_comp))
                 ]
             )
 

From d565cf90048d041a2f0d12da22d1ea8b7b465f59 Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Mon, 7 Apr 2025 16:06:08 +0200
Subject: [PATCH 52/92] fix torch dtype issue in xp.full

---
 sklearn/mixture/_base.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index 3f95bf56d33d2..5a871602db1f6 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -437,7 +437,7 @@ def sample(self, n_samples=1):
         """
         check_is_fitted(self)
         # TODO what is a cleaner way to do this, should we have a self.xp_?
-        xp, _, device_ = get_namespace_and_device(self.means_)
+        xp, _ = get_namespace(self.means_)
 
         if n_samples < 1:
             raise ValueError(
@@ -487,11 +487,17 @@ def sample(self, n_samples=1):
                 ]
             )
 
-        y = xp.concat(
+        """y = xp.concat(
             [
-                xp.full(sample, j, dtype=xp.int32)
+                xp.full(int(sample), j, dtype=xp.int32)
                 for j, sample in enumerate(n_samples_comp)
             ]
+        )"""
+        y = xp.concat(
+            [
+                xp.full(int(n_samples_comp[i]), i, dtype=xp.int32)
+                for i in range(len(n_samples_comp))
+            ]
         )
 
         return (X, y)

From c836e8dc1801adc96a72336a505ad76b17fda2df Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Wed, 9 Apr 2025 11:23:25 +0200
Subject: [PATCH 53/92] use numpy for random reneration in sample

---
 sklearn/mixture/_base.py | 65 ++++++++++++++++++++--------------------
 1 file changed, 32 insertions(+), 33 deletions(-)

diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index 5a871602db1f6..f15c0547e4ec4 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -15,7 +15,12 @@
 from ..cluster import kmeans_plusplus
 from ..exceptions import ConvergenceWarning
 from ..utils import check_random_state
-from ..utils._array_api import _logsumexp, get_namespace, get_namespace_and_device
+from ..utils._array_api import (
+    _convert_to_numpy,
+    _logsumexp,
+    get_namespace,
+    get_namespace_and_device,
+)
 from ..utils._param_validation import Interval, StrOptions
 from ..utils.validation import check_is_fitted, validate_data
 
@@ -437,7 +442,7 @@ def sample(self, n_samples=1):
         """
         check_is_fitted(self)
         # TODO what is a cleaner way to do this, should we have a self.xp_?
-        xp, _ = get_namespace(self.means_)
+        xp, _, device_ = get_namespace_and_device(self.means_)
 
         if n_samples < 1:
             raise ValueError(
@@ -447,60 +452,54 @@ def sample(self, n_samples=1):
 
         _, n_features = self.means_.shape
         rng = check_random_state(self.random_state)
-        n_samples_comp = rng.multinomial(n_samples, self.weights_)
+        n_samples_comp = rng.multinomial(
+            n_samples, _convert_to_numpy(self.weights_, xp)
+        )
 
         if self.covariance_type == "full":
-            X = xp.concat(
+            X = np.vstack(
                 [
-                    xp.asarray(
-                        rng.multivariate_normal(
-                            self.means_[i, ...],
-                            self.covariances_[i, ...],
-                            int(n_samples_comp[i]),
-                        )
+                    rng.multivariate_normal(mean, covariance, int(sample))
+                    for (mean, covariance, sample) in zip(
+                        _convert_to_numpy(self.means_, xp),
+                        _convert_to_numpy(self.covariances_, xp),
+                        n_samples_comp,
                     )
-                    for i in range(len(n_samples_comp))
                 ]
             )
         elif self.covariance_type == "tied":
-            X = xp.concat(
+            X = np.vstack(
                 [
-                    xp.asarray(
-                        rng.multivariate_normal(
-                            self.means_[i, ...],
-                            self.covariances_,
-                            int(n_samples_comp[i]),
-                        )
+                    rng.multivariate_normal(
+                        mean, _convert_to_numpy(self.covariances_, xp), int(sample)
+                    )
+                    for (mean, sample) in zip(
+                        _convert_to_numpy(self.means_, xp), n_samples_comp
                     )
-                    for i in range(len(n_samples_comp))
                 ]
             )
         else:
-            X = xp.concat(
+            X = np.vstack(
                 [
-                    self.means_[i, ...]
-                    + xp.asarray(
-                        rng.standard_normal(size=(n_samples_comp[i, ...], n_features))
+                    mean
+                    + rng.standard_normal(size=(sample, n_features))
+                    * np.sqrt(covariance)
+                    for (mean, covariance, sample) in zip(
+                        _convert_to_numpy(self.means_, xp),
+                        _convert_to_numpy(self.covariances_, xp),
+                        n_samples_comp,
                     )
-                    * xp.sqrt(self.covariances_[i, ...])
-                    for i in range(len(n_samples_comp))
                 ]
             )
 
-        """y = xp.concat(
-            [
-                xp.full(int(sample), j, dtype=xp.int32)
-                for j, sample in enumerate(n_samples_comp)
-            ]
-        )"""
         y = xp.concat(
             [
-                xp.full(int(n_samples_comp[i]), i, dtype=xp.int32)
+                xp.full(int(n_samples_comp[i]), i, dtype=xp.int32, device=device_)
                 for i in range(len(n_samples_comp))
             ]
         )
 
-        return (X, y)
+        return xp.asarray(X, device=device_), y
 
     def _estimate_weighted_log_prob(self, X, xp=None):
         """Estimate the weighted log-probabilities, log P(X | Z) + log weights.

From 668c1b0c11013c5c216ccb04cc732c25db49845f Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Wed, 9 Apr 2025 12:00:34 +0200
Subject: [PATCH 54/92] remove old comment

---
 sklearn/mixture/_base.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index f15c0547e4ec4..1aad065008252 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -441,7 +441,6 @@ def sample(self, n_samples=1):
             Component labels.
         """
         check_is_fitted(self)
-        # TODO what is a cleaner way to do this, should we have a self.xp_?
         xp, _, device_ = get_namespace_and_device(self.means_)
 
         if n_samples < 1:

From 7fef10aa29f2c78dfeea8c17146aa9a808a09810 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Wed, 9 Apr 2025 14:33:54 +0200
Subject: [PATCH 55/92] Only use np.errstate for numpy namespace

---
 sklearn/mixture/_base.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index 1aad065008252..05ea1c8c74306 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -5,6 +5,7 @@
 
 import warnings
 from abc import ABCMeta, abstractmethod
+from contextlib import nullcontext
 from numbers import Integral, Real
 from time import time
 
@@ -17,6 +18,7 @@
 from ..utils import check_random_state
 from ..utils._array_api import (
     _convert_to_numpy,
+    _is_numpy_namespace,
     _logsumexp,
     get_namespace,
     get_namespace_and_device,
@@ -562,10 +564,11 @@ def _estimate_log_prob_resp(self, X, xp=None):
         weighted_log_prob = self._estimate_weighted_log_prob(X, xp=xp)
         log_prob_norm = _logsumexp(weighted_log_prob, axis=1, xp=xp)
 
-        # TODO np.errstate not in the array API spec, decide what to do here
-        # maybe something like this
-        #  context_manager = np.errstate(under="ignore") if xp is np else nullcontext
-        with np.errstate(under="ignore"):
+        # There is no errstate equivalent for warning/error management in array API
+        context_manager = (
+            np.errstate(under="ignore") if _is_numpy_namespace(xp) else nullcontext()
+        )
+        with context_manager:
             # ignore underflow
             log_resp = weighted_log_prob - log_prob_norm[:, xp.newaxis]
         return log_prob_norm, log_resp

From c9a355d868e9db1c4f31c8975e9d1730b074549b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Wed, 9 Apr 2025 15:22:40 +0200
Subject: [PATCH 56/92] Use int64 to be closer to previous code that was doing
 dtype=int

---
 sklearn/mixture/_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index 05ea1c8c74306..1ef1940b60a59 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -495,7 +495,7 @@ def sample(self, n_samples=1):
 
         y = xp.concat(
             [
-                xp.full(int(n_samples_comp[i]), i, dtype=xp.int32, device=device_)
+                xp.full(int(n_samples_comp[i]), i, dtype=xp.int64, device=device_)
                 for i in range(len(n_samples_comp))
             ]
         )

From a7121815a7c2b5cf31a405861f327b66706fa7db Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Wed, 7 May 2025 10:20:51 +0200
Subject: [PATCH 57/92] colons instead of elipsis

---
 sklearn/mixture/_gaussian_mixture.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index 116e5db61414d..eb9a8ebf54c45 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -185,9 +185,9 @@ def _estimate_gaussian_covariances_full(resp, X, nk, means, reg_covar, xp=None):
         (n_components, n_features, n_features), device=device_, dtype=X.dtype
     )
     for k in range(n_components):
-        diff = X - means[k, ...]
-        covariances[k, ...] = ((resp[:, k] * diff.T) @ diff) / nk[k]
-        my_flat = xp.reshape(covariances[k, ...], (-1,))
+        diff = X - means[k, :]
+        covariances[k, :, :] = ((resp[:, k] * diff.T) @ diff) / nk[k]
+        my_flat = xp.reshape(covariances[k, :, :], (-1,))
         my_flat[:: n_features + 1] += reg_covar
     return covariances
 
@@ -355,7 +355,7 @@ def _compute_precision_cholesky(covariances, covariance_type, xp=None):
             (n_components, n_features, n_features), device=device_, dtype=dtype
         )
         for k in range(covariances.shape[0]):
-            covariance = covariances[k, ...]
+            covariance = covariances[k, :, :]
             try:
                 # TODO we are using xp.linalg instead of scipy.linalg.cholesky, maybe
                 # separate branches for array API and numpy?
@@ -366,7 +366,7 @@ def _compute_precision_cholesky(covariances, covariance_type, xp=None):
             # TODO we are using xp.linalg.solve instead of scipy.linalg.solve_triangular
             # probably separate branches for array API and numpy? maybe
             # https://github.com/scikit-learn/scikit-learn/pull/29318 is relevant
-            precisions_chol[k, ...] = xp.linalg.solve(
+            precisions_chol[k, :, :] = xp.linalg.solve(
                 cov_chol, xp.eye(n_features, device=device_, dtype=dtype)
             ).T
     elif covariance_type == "tied":
@@ -524,8 +524,8 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type, xp=N
     if covariance_type == "full":
         log_prob = xp.empty((n_samples, n_components), dtype=X.dtype, device=device_)
         for k in range(means.shape[0]):
-            mu = means[k, ...]
-            prec_chol = precisions_chol[k, ...]
+            mu = means[k, :]
+            prec_chol = precisions_chol[k, :, :]
             y = (X @ prec_chol) - (mu @ prec_chol)
             log_prob[:, k] = xp.sum(xp.square(y), axis=1)
 
@@ -941,8 +941,8 @@ def _set_parameters(self, params, xp=None):
         if self.covariance_type == "full":
             self.precisions_ = xp.empty_like(self.precisions_cholesky_, device=device_)
             for k in range(self.precisions_cholesky_.shape[0]):
-                prec_chol = self.precisions_cholesky_[k, ...]
-                self.precisions_[k, ...] = prec_chol @ prec_chol.T
+                prec_chol = self.precisions_cholesky_[k, :, :]
+                self.precisions_[k, :, :] = prec_chol @ prec_chol.T
 
         elif self.covariance_type == "tied":
             self.precisions_ = self.precisions_cholesky_ @ self.precisions_cholesky_.T

From 038632fc188eb67ce4118b54fb16cae813ab4d6b Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Wed, 7 May 2025 10:28:51 +0200
Subject: [PATCH 58/92] revert changes in k-means initialisation

---
 sklearn/mixture/_base.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index 1ef1940b60a59..e43e1023b9a5f 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -116,7 +116,7 @@ def _initialize_parameters(self, X, random_state, xp=None):
         n_samples, _ = X.shape
 
         if self.init_params == "kmeans":
-            resp = xp.zeros((n_samples, self.n_components), dtype=X.dtype)
+            resp = np.zeros((n_samples, self.n_components), dtype=X.dtype)
             label = (
                 cluster.KMeans(
                     n_clusters=self.n_components, n_init=1, random_state=random_state
@@ -124,7 +124,7 @@ def _initialize_parameters(self, X, random_state, xp=None):
                 .fit(X)
                 .labels_
             )
-            resp[xp.arange(n_samples), label] = 1
+            resp[np.arange(n_samples), label] = 1
         elif self.init_params == "random":
             resp = xp.asarray(
                 random_state.uniform(size=(n_samples, self.n_components)),
@@ -144,13 +144,13 @@ def _initialize_parameters(self, X, random_state, xp=None):
             for count, index in enumerate(indices):
                 resp[index, count] = 1
         elif self.init_params == "k-means++":
-            resp = xp.zeros((n_samples, self.n_components), dtype=X.dtype)
+            resp = np.zeros((n_samples, self.n_components), dtype=X.dtype)
             _, indices = kmeans_plusplus(
                 X,
                 self.n_components,
                 random_state=random_state,
             )
-            resp[indices, xp.arange(self.n_components)] = 1
+            resp[indices, np.arange(self.n_components)] = 1
 
         self._initialize(X, resp)
 

From 18b3fe0437cac0f3aa81e4a04deb2a0949e043b5 Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Wed, 7 May 2025 10:59:54 +0200
Subject: [PATCH 59/92] add smote test for other methods

---
 sklearn/mixture/_base.py                       | 4 +++-
 sklearn/mixture/tests/test_gaussian_mixture.py | 7 +++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index e43e1023b9a5f..19952b6c8224b 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -386,7 +386,9 @@ def score(self, X, y=None):
         log_likelihood : float
             Log-likelihood of `X` under the Gaussian mixture model.
         """
-        return self.score_samples(X).mean()
+        # check if X is on the same namespace as fitted attributes:
+        xp, _ = get_namespace(X, self.means_)
+        return xp.mean(self.score_samples(X))
 
     def predict(self, X):
         """Predict the labels for the data samples in X using trained model.
diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index 6f51b5242205a..38be79124e43c 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -1514,6 +1514,13 @@ def test_gaussian_mixture_array_api_compliance(
         assert device(X) == device(gmm.means_)
         assert device(X) == device(gmm.covariances_)
 
+        # smoke test other methods
+        # TODO: maybe test with X on different namespace/device as training
+        gmm.score_samples(X)
+        gmm.score(X)
+        gmm.aic(X)
+        gmm.bic(X)
+
     assert_allclose(means_, _convert_to_numpy(gmm.means_, xp=xp))
     assert_allclose(covariances_, _convert_to_numpy(gmm.covariances_, xp=xp))
 

From 8f00364e7e66d967d7789b45a2d4e0baaf9a0051 Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Wed, 7 May 2025 12:39:36 +0200
Subject: [PATCH 60/92] add lacking check_is_fitted to BaseMixture.score

---
 sklearn/mixture/_base.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index 19952b6c8224b..7b4f98d04797c 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -386,6 +386,7 @@ def score(self, X, y=None):
         log_likelihood : float
             Log-likelihood of `X` under the Gaussian mixture model.
         """
+        check_is_fitted(self)
         # check if X is on the same namespace as fitted attributes:
         xp, _ = get_namespace(X, self.means_)
         return xp.mean(self.score_samples(X))

From 3aaabf5967029e7cdf2c60cfd540236a2e38149e Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Fri, 9 May 2025 14:42:44 +0200
Subject: [PATCH 61/92] re-trigger CI


From 0084640638dd8f949c079002f34294e8b6550eb8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 9 May 2025 17:11:52 +0200
Subject: [PATCH 62/92] Add torch import

---
 sklearn/externals/array_api_compat/torch/linalg.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/externals/array_api_compat/torch/linalg.py b/sklearn/externals/array_api_compat/torch/linalg.py
index e26198b9b562e..3ddf4d009248d 100644
--- a/sklearn/externals/array_api_compat/torch/linalg.py
+++ b/sklearn/externals/array_api_compat/torch/linalg.py
@@ -11,6 +11,7 @@
 from ._aliases import _fix_promotion, sum
 
 from torch.linalg import * # noqa: F403
+import torch
 
 # torch.linalg doesn't define __all__
 # from torch.linalg import __all__ as linalg_all

From f9b2946db36902cffabac0c94dbb17cc0727e8ea Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Wed, 14 May 2025 11:40:42 +0200
Subject: [PATCH 63/92] different branch for numpy.linalg; only re-raise numpy
 error

---
 sklearn/mixture/_gaussian_mixture.py | 46 ++++++++++++++++------------
 1 file changed, 27 insertions(+), 19 deletions(-)

diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index 8326d8f88bc7b..8dd502f1c9371 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -4,8 +4,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 import math
 
+import numpy as np
 from scipy import linalg
 
+from sklearn.externals.array_api_compat.common._helpers import is_numpy_namespace
+
 from .._config import get_config
 from ..externals import array_api_extra as xpx
 from ..utils import check_array
@@ -316,6 +319,20 @@ def _estimate_gaussian_parameters(X, resp, reg_covar, covariance_type, xp=None):
     return nk, means, covariances
 
 
+def _call_cholesky(covariance, xp):
+    if is_numpy_namespace(xp):
+        return linalg.cholesky(covariance, lower=True)
+    else:
+        return xp.linalg.cholesky(covariance)
+
+
+def _call_solve(cov_chol, eye_matrix, xp):
+    if is_numpy_namespace(xp):
+        return linalg.solve_triangular(cov_chol, eye_matrix, lower=True)
+    else:
+        return xp.linalg.solve(cov_chol, eye_matrix)
+
+
 def _compute_precision_cholesky(covariances, covariance_type, xp=None):
     """Compute the Cholesky decomposition of the precisions.
 
@@ -357,31 +374,22 @@ def _compute_precision_cholesky(covariances, covariance_type, xp=None):
         for k in range(covariances.shape[0]):
             covariance = covariances[k, :, :]
             try:
-                # TODO we are using xp.linalg instead of scipy.linalg.cholesky, maybe
-                # separate branches for array API and numpy?
-                cov_chol = xp.linalg.cholesky(covariance)
-            except xp.linalg.LinAlgError:
+                cov_chol = _call_cholesky(covariance, xp)
+            # catch only numpy exceptions, b/c exceptions aren't part of array api spec
+            except np.linalg.LinAlgError:
                 raise ValueError(estimate_precision_error_message)
-
-            # TODO we are using xp.linalg.solve instead of scipy.linalg.solve_triangular
-            # probably separate branches for array API and numpy? maybe
-            # https://github.com/scikit-learn/scikit-learn/pull/29318 is relevant
-            precisions_chol[k, :, :] = xp.linalg.solve(
-                cov_chol, xp.eye(n_features, device=device_, dtype=dtype)
+            precisions_chol[k, :, :] = _call_solve(
+                cov_chol, xp.eye(n_features, dtype=dtype, device=device_), xp
             ).T
     elif covariance_type == "tied":
         _, n_features = covariances.shape
         try:
-            # TODO we are using xp.linalg instead of scipy.linalg.cholesky, maybe
-            # separate branches for array API and numpy?
-            cov_chol = xp.linalg.cholesky(covariances)
-        except linalg.LinAlgError:
+            cov_chol = _call_cholesky(covariances, xp)
+        # catch only numpy exceptions, since exceptions are not part of array api spec
+        except np.linalg.LinAlgError:
             raise ValueError(estimate_precision_error_message)
-        # TODO we are using xp.linalg.solve instead of scipy.linalg.solve_triangular
-        # probably separate branches for array API and numpy? maybe
-        # https://github.com/scikit-learn/scikit-learn/pull/29318 is relevant
-        precisions_chol = xp.linalg.solve(
-            cov_chol, xp.eye(n_features, dtype=dtype, device=device_)
+        precisions_chol = _call_solve(
+            cov_chol, xp.eye(n_features, dtype=dtype, device=device_), xp
         ).T
     else:
         if xp.any(covariances <= 0.0):

From adc992e1530da523e0d15df3d33642693b40157e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Wed, 14 May 2025 11:43:23 +0200
Subject: [PATCH 64/92] Remove comment

---
 sklearn/mixture/_gaussian_mixture.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index 8dd502f1c9371..076723cc6808d 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -869,8 +869,6 @@ def _initialize(self, X, resp, xp=None):
 
         resp : array-like of shape (n_samples, n_components)
         """
-        # TODO: check if device_ should be computed in fit_predict and passed down the
-        # call chain
         xp, _, device_ = get_namespace_and_device(X, xp=xp)
         n_samples, _ = X.shape
         weights, means, covariances = None, None, None

From 0bb750cb4e828eadec24c4c1319456d98e04e8a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Thu, 15 May 2025 14:54:14 +0200
Subject: [PATCH 65/92] Remove script

---
 gmm-array-api.py | 72 ------------------------------------------------
 1 file changed, 72 deletions(-)
 delete mode 100644 gmm-array-api.py

diff --git a/gmm-array-api.py b/gmm-array-api.py
deleted file mode 100644
index f0da95a8aca9e..0000000000000
--- a/gmm-array-api.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Authors: The scikit-learn developers
-# SPDX-License-Identifier: BSD-3-Clause
-
-# %%
-
-import os
-
-import array_api_strict
-import matplotlib as mpl
-import matplotlib.pyplot as plt
-import numpy as np
-
-import sklearn
-from sklearn.datasets import make_blobs
-from sklearn.mixture import GaussianMixture
-
-os.environ["SCIPY_ARRAY_API"] = "1"
-
-X, y = make_blobs(n_samples=int(1e3), n_features=2, centers=3, random_state=0)
-# X, y = torch.asarray(X), torch.asarray(y)
-X, y = array_api_strict.asarray(X), array_api_strict.asarray(y)
-
-sklearn.set_config(array_api_dispatch=True)
-
-gmm = GaussianMixture(
-    n_components=3,
-    covariance_type="diag",
-    random_state=0,
-    init_params="random",
-    tol=1e-5,
-    max_iter=1000,
-).fit(X)
-print(gmm.means_)
-print(gmm.covariances_)
-
-fig, ax = plt.subplots()
-
-X = np.asarray(X)
-y = np.asarray(y)
-
-ax.scatter(X[:, 0], X[:, 1], c=y)
-
-
-def make_ellipses(gmm, ax):
-    gmm.covariances_ = np.asarray(gmm.covariances_)
-    colors = ["navy", "turquoise", "darkorange"]
-    for n, color in enumerate(colors):
-        if gmm.covariance_type == "full":
-            covariances = gmm.covariances_[n][:2, :2]
-        elif gmm.covariance_type == "tied":
-            covariances = gmm.covariances_[:2, :2]
-        elif gmm.covariance_type == "diag":
-            covariances = np.diag(gmm.covariances_[n][:2])
-        elif gmm.covariance_type == "spherical":
-            covariances = np.eye(gmm.means_.shape[1]) * gmm.covariances_[n]
-        v, w = np.linalg.eigh(covariances)
-        u = w[0] / np.linalg.norm(w[0])
-        angle = np.arctan2(u[1], u[0])
-        angle = 180 * angle / np.pi  # convert to degrees
-        v = 2.0 * np.sqrt(2.0) * np.sqrt(v)
-        ell = mpl.patches.Ellipse(
-            gmm.means_[n, :2], v[0], v[1], angle=180 + angle, color=color
-        )
-        ell.set_clip_box(ax.bbox)
-        ell.set_alpha(0.5)
-        ax.add_artist(ell)
-        ax.set_aspect("equal", "datalim")
-
-
-make_ellipses(gmm, ax)
-
-# %%

From 7874231810fb8de4956fd40adf7e9ac4c45f5800 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Thu, 15 May 2025 17:10:48 +0200
Subject: [PATCH 66/92] update TODOs

---
 sklearn/externals/array_api_compat/torch/linalg.py | 4 ++++
 sklearn/mixture/tests/test_gaussian_mixture.py     | 5 +++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/sklearn/externals/array_api_compat/torch/linalg.py b/sklearn/externals/array_api_compat/torch/linalg.py
index 3ddf4d009248d..768559bf8aa32 100644
--- a/sklearn/externals/array_api_compat/torch/linalg.py
+++ b/sklearn/externals/array_api_compat/torch/linalg.py
@@ -11,6 +11,10 @@
 from ._aliases import _fix_promotion, sum
 
 from torch.linalg import * # noqa: F403
+# TODO Temporary work-around for
+# https://github.com/data-apis/array-api-compat/issues/320. Remove when
+# array-api-compat 1.12 is released and our vendored array-api-compat has been
+# updated.
 import torch
 
 # torch.linalg doesn't define __all__
diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index 38be79124e43c..73a209a5559ae 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -1515,7 +1515,8 @@ def test_gaussian_mixture_array_api_compliance(
         assert device(X) == device(gmm.covariances_)
 
         # smoke test other methods
-        # TODO: maybe test with X on different namespace/device as training
+        # TODO compare with same method on numpy
+        # TODO add predict and predict_proba
         gmm.score_samples(X)
         gmm.score(X)
         gmm.aic(X)
@@ -1597,7 +1598,6 @@ def test_gaussian_mixture_array_api_compliance_with_weights_init(
 #         gmm.fit(X)
 
 
-# TODO: remove when gmm works with `init_params` `kmeans` or `k-means++`
 @skip_if_array_api_compat_not_configured
 @pytest.mark.parametrize("init_params", ["kmeans", "k-means++"])
 @pytest.mark.parametrize(
@@ -1631,6 +1631,7 @@ def test_gaussian_mixture_sample_array_api_compliance(
     covariance_type, array_namespace, device_, dtype, global_random_seed
 ):
     """Test that array api works in GaussianMixture.sample()."""
+    # TODO move this to test_gaussian_mixture_array_api_compliance function?
     xp = _array_api_for_tests(array_namespace, device_)
     X, _ = make_blobs(
         n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed

From 96d8d8ccd2970c780454e2eb306bcc9b3d116e1a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Thu, 15 May 2025 17:45:16 +0200
Subject: [PATCH 67/92] only use X array namespace at prediction time

---
 sklearn/mixture/_base.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index 7b4f98d04797c..aa4ce15058dba 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -387,8 +387,7 @@ def score(self, X, y=None):
             Log-likelihood of `X` under the Gaussian mixture model.
         """
         check_is_fitted(self)
-        # check if X is on the same namespace as fitted attributes:
-        xp, _ = get_namespace(X, self.means_)
+        xp, _ = get_namespace(X)
         return xp.mean(self.score_samples(X))
 
     def predict(self, X):

From 27a8cd25de69b66e3efc067bb4bb94a40d1eb7c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Thu, 15 May 2025 17:45:56 +0200
Subject: [PATCH 68/92] Fix predict

---
 sklearn/mixture/_base.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index aa4ce15058dba..d0fd91d8c07e2 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -405,8 +405,9 @@ def predict(self, X):
             Component labels.
         """
         check_is_fitted(self)
+        xp, _ = get_namespace(X)
         X = validate_data(self, X, reset=False)
-        return self._estimate_weighted_log_prob(X).argmax(axis=1)
+        return xp.argmax(self._estimate_weighted_log_prob(X))
 
     def predict_proba(self, X):
         """Evaluate the components' density for each sample.

From 4c6271570f9acf821954f4a46eaf3c9ac0a5b9be Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Thu, 15 May 2025 17:57:26 +0200
Subject: [PATCH 69/92] remove TODO

---
 sklearn/mixture/_bayesian_mixture.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/sklearn/mixture/_bayesian_mixture.py b/sklearn/mixture/_bayesian_mixture.py
index d5eace5433be5..76589c8214a99 100644
--- a/sklearn/mixture/_bayesian_mixture.py
+++ b/sklearn/mixture/_bayesian_mixture.py
@@ -417,9 +417,6 @@ def _check_parameters(self, X, xp=None):
         ----------
         X : array-like of shape (n_samples, n_features)
         """
-        # TODO should we pass xp to the check functions in other words
-        # should we test BayesianGaussianMixture array API support?
-        # Maybe we should leave it for a further PR
         self._check_weights_parameters()
         self._check_means_parameters(X)
         self._check_precision_parameters(X)

From 303f392fd0175c3b2c50dccfe23111e012e73a65 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Thu, 15 May 2025 21:20:57 +0200
Subject: [PATCH 70/92] Fix

---
 sklearn/mixture/_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index d0fd91d8c07e2..30fd42ec50f10 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -407,7 +407,7 @@ def predict(self, X):
         check_is_fitted(self)
         xp, _ = get_namespace(X)
         X = validate_data(self, X, reset=False)
-        return xp.argmax(self._estimate_weighted_log_prob(X))
+        return xp.argmax(self._estimate_weighted_log_prob(X), axis=1)
 
     def predict_proba(self, X):
         """Evaluate the components' density for each sample.

From c232e39d0d08e039887194ede3d0d9f9dd2dcb1b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 16 May 2025 09:06:55 +0200
Subject: [PATCH 71/92] Better variable name

---
 sklearn/mixture/_gaussian_mixture.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index 076723cc6808d..31e3b27fc1766 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -190,8 +190,8 @@ def _estimate_gaussian_covariances_full(resp, X, nk, means, reg_covar, xp=None):
     for k in range(n_components):
         diff = X - means[k, :]
         covariances[k, :, :] = ((resp[:, k] * diff.T) @ diff) / nk[k]
-        my_flat = xp.reshape(covariances[k, :, :], (-1,))
-        my_flat[:: n_features + 1] += reg_covar
+        covariances_flat = xp.reshape(covariances[k, :, :], (-1,))
+        covariances_flat[:: n_features + 1] += reg_covar
     return covariances
 
 
@@ -220,8 +220,8 @@ def _estimate_gaussian_covariances_tied(resp, X, nk, means, reg_covar, xp=None):
     avg_means2 = nk * means.T @ means
     covariance = avg_X2 - avg_means2
     covariance /= xp.sum(nk)
-    my_flat = xp.reshape(covariance, (-1,))
-    my_flat[:: covariance.shape[0] + 1] += reg_covar
+    covariance_flat = xp.reshape(covariance, (-1,))
+    covariance_flat[:: covariance.shape[0] + 1] += reg_covar
     return covariance
 
 
From a43eeb2863457a0200a1543e57f200e6b1cb2621 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 16 May 2025 09:27:27 +0200
Subject: [PATCH 72/92] Simplify with math.log

---
 sklearn/mixture/_gaussian_mixture.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index 31e3b27fc1766..d193cd2955159 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -561,14 +561,7 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type, xp=N
         )
     # Since we are using the precision of the Cholesky decomposition,
     # `- 0.5 * log_det_precision` becomes `+ log_det_precision_chol`
-    return (
-        -0.5
-        * (
-            n_features * xp.log(xp.asarray(2 * xp.pi, dtype=X.dtype, device=device_))
-            + log_prob
-        )
-        + log_det
-    )
+    return -0.5 * (n_features * math.log(2 * xp.pi) + log_prob) + log_det
 
 
 class GaussianMixture(BaseMixture):

From 3a72ec90329cb4ae43b5c46ab4cc1997f77d4df7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 16 May 2025 14:14:39 +0200
Subject: [PATCH 73/92] Use math.pi

---
 sklearn/mixture/_gaussian_mixture.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index d193cd2955159..2c26312e124de 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -561,7 +561,7 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type, xp=N
         )
     # Since we are using the precision of the Cholesky decomposition,
     # `- 0.5 * log_det_precision` becomes `+ log_det_precision_chol`
-    return -0.5 * (n_features * math.log(2 * xp.pi) + log_prob) + log_det
+    return -0.5 * (n_features * math.log(2 * math.pi) + log_prob) + log_det
 
 
 class GaussianMixture(BaseMixture):

From 8f4079fa0332c3dc35fbbd19b173d96defa98ddd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 16 May 2025 15:33:09 +0200
Subject: [PATCH 74/92] Improve tests + make score return float

---
 sklearn/mixture/_base.py                      |  6 +-
 .../mixture/tests/test_gaussian_mixture.py    | 95 +++++++++----------
 2 files changed, 49 insertions(+), 52 deletions(-)

diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index 30fd42ec50f10..929a4655fe688 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -139,8 +139,10 @@ def _initialize_parameters(self, X, random_state, xp=None):
             indices = random_state.choice(
                 n_samples, size=self.n_components, replace=False
             )
-            # TODO: instead of for-loop, find something more efficient; previous code:
+            # TODO: when array API supports __setitem__ with fancy indexing we
+            # can use the previous code:
             # resp[indices, xp.arange(self.n_components)] = 1
+            # Until we use a for loop one on dimension.
             for count, index in enumerate(indices):
                 resp[index, count] = 1
         elif self.init_params == "k-means++":
@@ -388,7 +390,7 @@ def score(self, X, y=None):
         """
         check_is_fitted(self)
         xp, _ = get_namespace(X)
-        return xp.mean(self.score_samples(X))
+        return float(xp.mean(self.score_samples(X)))
 
     def predict(self, X):
         """Predict the labels for the data samples in X using trained model.
diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index 73a209a5559ae..88651a982331a 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -1502,28 +1502,55 @@ def test_gaussian_mixture_array_api_compliance(
     )
 
     gmm.fit(X)
-    means_ = gmm.means_
-    covariances_ = gmm.covariances_
 
     xp = _array_api_for_tests(array_namespace, device_)
-    X = xp.asarray(X, device=device_)
+    X_xp = xp.asarray(X, device=device_)
 
     with sklearn.config_context(array_api_dispatch=True):
-        gmm.fit(X)
-
-        assert device(X) == device(gmm.means_)
-        assert device(X) == device(gmm.covariances_)
-
-        # smoke test other methods
-        # TODO compare with same method on numpy
-        # TODO add predict and predict_proba
-        gmm.score_samples(X)
-        gmm.score(X)
-        gmm.aic(X)
-        gmm.bic(X)
-
-    assert_allclose(means_, _convert_to_numpy(gmm.means_, xp=xp))
-    assert_allclose(covariances_, _convert_to_numpy(gmm.covariances_, xp=xp))
+        gmm_xp = sklearn.clone(gmm)
+        gmm_xp.fit(X_xp)
+
+        assert get_namespace(gmm_xp.means_)[0] == xp
+        assert get_namespace(gmm_xp.covariances_)[0] == xp
+        assert device(gmm_xp.means_) == device(X_xp)
+        assert device(gmm_xp.covariances_) == device(X_xp)
+
+        xp_predict = gmm_xp.predict(X_xp)
+        xp_predict_proba = gmm_xp.predict_proba(X_xp)
+        xp_score_samples = gmm_xp.score_samples(X_xp)
+        xp_score = gmm_xp.score(X_xp)
+        xp_aic = gmm_xp.aic(X_xp)
+        xp_bic = gmm_xp.bic(X_xp)
+        xp_sample_X, xp_sample_y = gmm_xp.sample(10)
+
+        results = [
+            xp_predict,
+            xp_predict_proba,
+            xp_score_samples,
+            xp_sample_X,
+            xp_sample_y,
+        ]
+        for result in results:
+            assert get_namespace(result)[0] == xp
+            assert device(result) == device(X_xp)
+
+        for score in [xp_score, xp_aic, xp_bic]:
+            assert isinstance(score, float)
+
+    # Check methods
+    assert_allclose(gmm.predict(X), _convert_to_numpy(xp_predict, xp=xp))
+    assert_allclose(gmm.predict_proba(X), _convert_to_numpy(xp_predict_proba, xp=xp))
+    assert_allclose(gmm.score_samples(X), _convert_to_numpy(xp_score_samples, xp=xp))
+    assert_allclose(gmm.score(X), xp_score)
+    assert_allclose(gmm.aic(X), xp_aic)
+    assert_allclose(gmm.bic(X), xp_bic)
+    sample_X, sample_y = gmm.sample(10)
+    assert_allclose(sample_X, _convert_to_numpy(xp_sample_X, xp=xp))
+    assert_allclose(sample_y, _convert_to_numpy(xp_sample_y, xp=xp))
+
+    # Check fitted attributes
+    assert_allclose(gmm.means_, _convert_to_numpy(gmm_xp.means_, xp=xp))
+    assert_allclose(gmm.covariances_, _convert_to_numpy(gmm_xp.covariances_, xp=xp))
 
     # TODO test means_init and precisions_init
 
@@ -1621,35 +1648,3 @@ def test_gaussian_mixture_raises_where_array_api_not_implemented(
             match="Allowed `init_params`.+if 'array_api_dispatch' is enabled",
         ):
             gmm.fit(X)
-
-
-@pytest.mark.parametrize("covariance_type", ["full", "tied", "diag"])
-@pytest.mark.parametrize(
-    "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations()
-)
-def test_gaussian_mixture_sample_array_api_compliance(
-    covariance_type, array_namespace, device_, dtype, global_random_seed
-):
-    """Test that array api works in GaussianMixture.sample()."""
-    # TODO move this to test_gaussian_mixture_array_api_compliance function?
-    xp = _array_api_for_tests(array_namespace, device_)
-    X, _ = make_blobs(
-        n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed
-    )
-    X = xp.asarray(X, device=device_)
-
-    with sklearn.config_context(array_api_dispatch=True):
-        gmm = GaussianMixture(
-            n_components=3,
-            covariance_type=covariance_type,
-            random_state=global_random_seed,
-            init_params="random",
-        )
-        gmm.fit(X)
-        X_sample, y_sample = gmm.sample()
-
-        assert get_namespace(X_sample)[0] == xp
-        assert get_namespace(y_sample)[0] == xp
-
-        assert device(X_sample) == device(X)
-        assert device(y_sample) == device(X)

From de1e5750fae0ce9ed0674e721d56d2595b23e565 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 16 May 2025 15:49:06 +0200
Subject: [PATCH 75/92] List GaussianMixture in the estimators supporting array
 API

---
 doc/modules/array_api.rst                                  | 2 ++
 doc/whats_new/upcoming_changes/array-api/30777.feature.rst | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/doc/modules/array_api.rst b/doc/modules/array_api.rst
index d24ce3573e7b6..4487a6a599790 100644
--- a/doc/modules/array_api.rst
+++ b/doc/modules/array_api.rst
@@ -117,6 +117,8 @@ Estimators
 - :class:`preprocessing.MaxAbsScaler`
 - :class:`preprocessing.MinMaxScaler`
 - :class:`preprocessing.Normalizer`
+- :class:`mixture.GaussianMixture` (with `init_params="random"` or
+  `init_params="random_from_data"` and `warm_start=False`)
 
 Meta-estimators
 ---------------
diff --git a/doc/whats_new/upcoming_changes/array-api/30777.feature.rst b/doc/whats_new/upcoming_changes/array-api/30777.feature.rst
index b3f0751fa0a0d..ab3510a72e6d3 100644
--- a/doc/whats_new/upcoming_changes/array-api/30777.feature.rst
+++ b/doc/whats_new/upcoming_changes/array-api/30777.feature.rst
@@ -1,4 +1,4 @@
 - :class:`sklearn.gaussian_mixture.GaussianMixture` with
-  `init_params` `"random"` or `"random_from_data"` and `warm_start=False` now supports
-  Array API compatible inputs.
+  `init_params="random"` or `init_params="random_from_data"` and
+  `warm_start=False` now supports Array API compatible inputs.
   By :user:`Stefanie Senger <StefanieSenger>` and :user:`Loïc Estève <lesteve>`

From 910aa1f500ee67fa1b373a05cadc52042d2e6b97 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Tue, 20 May 2025 14:11:12 +0200
Subject: [PATCH 76/92] Remove temporary array-api-compat work-around

---
 sklearn/externals/array_api_compat/torch/linalg.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/sklearn/externals/array_api_compat/torch/linalg.py b/sklearn/externals/array_api_compat/torch/linalg.py
index 9f4a9ab4d69cc..70d7240500ce4 100644
--- a/sklearn/externals/array_api_compat/torch/linalg.py
+++ b/sklearn/externals/array_api_compat/torch/linalg.py
@@ -4,11 +4,6 @@
 from typing import Optional, Union, Tuple
 
 from torch.linalg import * # noqa: F403
-# TODO Temporary work-around for
-# https://github.com/data-apis/array-api-compat/issues/320. Remove when
-# array-api-compat 1.12 is released and our vendored array-api-compat has been
-# updated.
-import torch
 
 # torch.linalg doesn't define __all__
 # from torch.linalg import __all__ as linalg_all

From 4fe376655027fd24dbd22b7590f760d05b909a41 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 6 Jun 2025 11:30:23 +0200
Subject: [PATCH 77/92] lint

---
 sklearn/utils/tests/test_array_api.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index 16bc41a9d39b3..a36ab3248be0d 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -636,8 +636,8 @@ def test_median(namespace, device, dtype_name, axis):
             assert get_namespace(result_xp)[0] == xp
             assert result_xp.device == X_xp.device
     assert_allclose(result_np, _convert_to_numpy(result_xp, xp=xp))
-    
-    
+
+
 @pytest.mark.parametrize(
     "array_namespace, device_, dtype_name", yield_namespace_device_dtype_combinations()
 )

From ce214a6cb6d4b6303599440c78fcdb6c64970bb6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 13 Jun 2025 14:18:06 +0200
Subject: [PATCH 78/92] Revert changes to test_bayesian_mixture.py

---
 sklearn/mixture/tests/test_bayesian_mixture.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/sklearn/mixture/tests/test_bayesian_mixture.py b/sklearn/mixture/tests/test_bayesian_mixture.py
index 357c21dc702ba..d36543903cb87 100644
--- a/sklearn/mixture/tests/test_bayesian_mixture.py
+++ b/sklearn/mixture/tests/test_bayesian_mixture.py
@@ -12,7 +12,6 @@
 from sklearn.mixture import BayesianGaussianMixture
 from sklearn.mixture._bayesian_mixture import _log_dirichlet_norm, _log_wishart_norm
 from sklearn.mixture.tests.test_gaussian_mixture import RandomData
-from sklearn.utils._array_api import get_namespace
 from sklearn.utils._testing import (
     assert_almost_equal,
     assert_array_equal,
@@ -260,7 +259,6 @@ def test_compare_covar_type():
     rand_data = RandomData(rng, scale=7)
     X = rand_data.X["full"]
     n_components = rand_data.n_components
-    xp, _ = get_namespace(X)
 
     for prior_type in PRIOR_TYPE:
         # Computation of the full_covariance
@@ -273,7 +271,7 @@ def test_compare_covar_type():
             tol=1e-7,
         )
         bgmm._check_parameters(X)
-        bgmm._initialize_parameters(X, np.random.RandomState(0), xp=xp)
+        bgmm._initialize_parameters(X, np.random.RandomState(0))
         full_covariances = (
             bgmm.covariances_ * bgmm.degrees_of_freedom_[:, np.newaxis, np.newaxis]
         )
@@ -288,7 +286,7 @@ def test_compare_covar_type():
             tol=1e-7,
         )
         bgmm._check_parameters(X)
-        bgmm._initialize_parameters(X, np.random.RandomState(0), xp=xp)
+        bgmm._initialize_parameters(X, np.random.RandomState(0))
 
         tied_covariance = bgmm.covariances_ * bgmm.degrees_of_freedom_
         assert_almost_equal(tied_covariance, np.mean(full_covariances, 0))
@@ -303,7 +301,7 @@ def test_compare_covar_type():
             tol=1e-7,
         )
         bgmm._check_parameters(X)
-        bgmm._initialize_parameters(X, np.random.RandomState(0), xp=xp)
+        bgmm._initialize_parameters(X, np.random.RandomState(0))
 
         diag_covariances = bgmm.covariances_ * bgmm.degrees_of_freedom_[:, np.newaxis]
         assert_almost_equal(
@@ -320,7 +318,7 @@ def test_compare_covar_type():
             tol=1e-7,
         )
         bgmm._check_parameters(X)
-        bgmm._initialize_parameters(X, np.random.RandomState(0), xp=xp)
+        bgmm._initialize_parameters(X, np.random.RandomState(0))
 
         spherical_covariances = bgmm.covariances_ * bgmm.degrees_of_freedom_
         assert_almost_equal(spherical_covariances, np.mean(diag_covariances, 1))

From a69cd62d140b8e54a6c27de7cd82cf32efd80f52 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 13 Jun 2025 14:34:12 +0200
Subject: [PATCH 79/92] Remove unnecessary check_is_fitted

---
 sklearn/mixture/_base.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index 929a4655fe688..776e3d4a79f3d 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -388,7 +388,6 @@ def score(self, X, y=None):
         log_likelihood : float
             Log-likelihood of `X` under the Gaussian mixture model.
         """
-        check_is_fitted(self)
         xp, _ = get_namespace(X)
         return float(xp.mean(self.score_samples(X)))
 

From 1a0e33be2b716deac67ab9b8e2df0090796534f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 13 Jun 2025 16:39:24 +0200
Subject: [PATCH 80/92] Add all array constructor params to test

---
 .../mixture/tests/test_gaussian_mixture.py    | 29 ++++++++++++++-----
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index 88651a982331a..44d0d73bf5a0a 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -32,6 +32,7 @@
 )
 from sklearn.utils._array_api import (
     _convert_to_numpy,
+    _get_namespace_device_dtype_ids,
     device,
     get_namespace,
     yield_namespace_device_dtype_combinations,
@@ -1485,7 +1486,9 @@ def test_gaussian_mixture_all_init_does_not_estimate_gaussian_parameters(
 @pytest.mark.parametrize("init_params", ["random", "random_from_data"])
 @pytest.mark.parametrize("covariance_type", ["full", "tied", "diag", "spherical"])
 @pytest.mark.parametrize(
-    "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations()
+    "array_namespace, device_, dtype",
+    yield_namespace_device_dtype_combinations(),
+    ids=_get_namespace_device_dtype_ids,
 )
 def test_gaussian_mixture_array_api_compliance(
     init_params, covariance_type, array_namespace, device_, dtype, global_random_seed
@@ -1552,29 +1555,39 @@ def test_gaussian_mixture_array_api_compliance(
     assert_allclose(gmm.means_, _convert_to_numpy(gmm_xp.means_, xp=xp))
     assert_allclose(gmm.covariances_, _convert_to_numpy(gmm_xp.covariances_, xp=xp))
 
-    # TODO test means_init and precisions_init
-
 
 @pytest.mark.parametrize(
-    "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations()
+    "array_namespace, device_, dtype",
+    yield_namespace_device_dtype_combinations(),
+    ids=_get_namespace_device_dtype_ids,
 )
-def test_gaussian_mixture_array_api_compliance_with_weights_init(
+def test_gaussian_mixture_array_api_compliance_with_array_like_constructor_parameters(
     array_namespace, device_, dtype, global_random_seed
 ):
     """Check that array api works with `weights_init`, which unlike other passed arrays
     is an init param."""
+    n_features = 2
+    n_components = 3
     X, _ = make_blobs(
-        n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed
+        n_samples=int(1e3),
+        n_features=n_features,
+        centers=3,
+        random_state=global_random_seed,
     )
+    X = X.astype(dtype)
 
     xp = _array_api_for_tests(array_namespace, device_)
     X = xp.asarray(X, device=device_)
 
+    means_init = xp.zeros((n_components, n_features), device=device_, dtype=X.dtype)
+    precisions_init = xp.ones((n_components, n_features), device=device_, dtype=X.dtype)
     gmm = GaussianMixture(
         n_components=3,
         covariance_type="diag",
         random_state=global_random_seed,
         init_params="random",
+        means_init=means_init,
+        precisions_init=precisions_init,
         weights_init=xp.asarray([0.1, 0.4, 0.5]),
     )
 
@@ -1628,7 +1641,9 @@ def test_gaussian_mixture_array_api_compliance_with_weights_init(
 @skip_if_array_api_compat_not_configured
 @pytest.mark.parametrize("init_params", ["kmeans", "k-means++"])
 @pytest.mark.parametrize(
-    "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations()
+    "array_namespace, device_, dtype",
+    yield_namespace_device_dtype_combinations(),
+    ids=_get_namespace_device_dtype_ids,
 )
 def test_gaussian_mixture_raises_where_array_api_not_implemented(
     init_params, array_namespace, device_, dtype

From 1dca29ac48fa9945f7d33b86e7dc7960fea86ffc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 13 Jun 2025 16:59:58 +0200
Subject: [PATCH 81/92] [azure parallel] tweak docstring

---
 sklearn/mixture/tests/test_gaussian_mixture.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index 44d0d73bf5a0a..e212572ecd68c 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -1564,8 +1564,9 @@ def test_gaussian_mixture_array_api_compliance(
 def test_gaussian_mixture_array_api_compliance_with_array_like_constructor_parameters(
     array_namespace, device_, dtype, global_random_seed
 ):
-    """Check that array api works with `weights_init`, which unlike other passed arrays
-    is an init param."""
+    """Check that array api works with array-like constructors: 'means_init',
+    'precisions_init' and 'weights_init'
+    """
     n_features = 2
     n_components = 3
     X, _ = make_blobs(

From b990682893dba158fd708fc03a76a626501833fd Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Sat, 14 Jun 2025 14:38:29 +0500
Subject: [PATCH 82/92] Update sklearn/utils/_array_api.py

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 sklearn/utils/_array_api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 9a3e0c01d4aaa..3a318ffd60e80 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -1032,7 +1032,7 @@ def _tolist(array, xp=None):
 
 def _logsumexp(array, axis=None, xp=None):
     # TODO replace by scipy.special.logsumexp when
-    # https://github.com/scipy/scipy/pull/22683 is in a relase
+    # https://github.com/scipy/scipy/pull/22683 is part of a release.
     # The following code is strongly inspired and simplified from
     # scipy.special._logsumexp.logsumexp
     xp, _, device = get_namespace_and_device(array, xp=xp)

From 72cd185c07b52469c938e34b7f41756cc48353f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Mon, 16 Jun 2025 14:25:56 +0200
Subject: [PATCH 83/92] Remove commented out test

---
 .../mixture/tests/test_gaussian_mixture.py    | 41 -------------------
 1 file changed, 41 deletions(-)

diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index e212572ecd68c..b7e2847710438 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -1598,47 +1598,6 @@ def test_gaussian_mixture_array_api_compliance_with_array_like_constructor_param
         assert device(X) == device(gmm.weights_)
 
 
-# TODO What is the expected behavior when weights init
-# and X are not in the same namespace/device?
-# It feels like check_array would need a xp argument?
-# @pytest.mark.parametrize(
-#     "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations()
-# )
-# def test_gaussian_mixture_array_api_different_namespaces(
-#     array_namespace, device_, dtype, global_random_seed
-# ):
-#     """Check that passing `weights_init` in a different namespace during instantiation
-#     correctly converts to the same namespace as X."""
-#     X, _ = make_blobs(
-#         n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed
-#     )
-
-#     xp = _array_api_for_tests(array_namespace, device_)
-#     X = xp.asarray(X, device=device_)
-
-#     # check with weights_init being a numpy array
-#     with sklearn.config_context(array_api_dispatch=True):
-#         gmm = GaussianMixture(
-#             n_components=3,
-#             covariance_type="diag",
-#             random_state=global_random_seed,
-#             init_params="random",
-#             weights_init=np.asarray([0.1, 0.4, 0.5]),
-#         )
-#         gmm.fit(X)
-
-#     # check with weights_init being an array_api_strict array
-#     with sklearn.config_context(array_api_dispatch=True):
-#         gmm = GaussianMixture(
-#             n_components=3,
-#             covariance_type="diag",
-#             random_state=global_random_seed,
-#             init_params="random",
-#             weights_init=array_api_strict.asarray([0.1, 0.4, 0.5]),
-#         )
-#         gmm.fit(X)
-
-
 @skip_if_array_api_compat_not_configured
 @pytest.mark.parametrize("init_params", ["kmeans", "k-means++"])
 @pytest.mark.parametrize(

From 3af1470e9f0bc00cf4c3949b952465193b030d2d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Mon, 16 Jun 2025 14:36:40 +0200
Subject: [PATCH 84/92] Handle comments

---
 sklearn/mixture/_base.py              | 6 +++---
 sklearn/utils/tests/test_array_api.py | 1 -
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index 776e3d4a79f3d..a9627a0e74e7f 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -142,9 +142,9 @@ def _initialize_parameters(self, X, random_state, xp=None):
             # TODO: when array API supports __setitem__ with fancy indexing we
             # can use the previous code:
             # resp[indices, xp.arange(self.n_components)] = 1
-            # Until we use a for loop one on dimension.
-            for count, index in enumerate(indices):
-                resp[index, count] = 1
+            # Until then we use a for loop on one dimension.
+            for col, index in enumerate(indices):
+                resp[index, col] = 1
         elif self.init_params == "k-means++":
             resp = np.zeros((n_samples, self.n_components), dtype=X.dtype)
             _, indices = kmeans_plusplus(
diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index a36ab3248be0d..5d35d86432f3c 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -662,7 +662,6 @@ def test_logsumexp_like_scipy_logsumexp(array_namespace, device_, dtype_name, ax
 
     # if torch on CPU or array api strict on default device
     # check that _logsumexp works when array API dispatch is disabled
-    # TODO is there a better way for this
     if (array_namespace == "torch" and device_ == "cpu") or (
         array_namespace == "array_api_strict" and "CPU" in str(device_)
     ):

From ecac6103404c575297182c5e519e74235512d9c9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Mon, 16 Jun 2025 17:29:28 +0200
Subject: [PATCH 85/92] use _call_cholesky

---
 sklearn/mixture/_gaussian_mixture.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index 2c26312e124de..f48b1211660a0 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -441,13 +441,13 @@ def _compute_precision_cholesky_from_precisions(precisions, covariance_type, xp=
     if covariance_type == "full":
         precisions_cholesky = xp.asarray(
             [
-                _flipudlr(xp.linalg.cholesky(_flipudlr(precision, xp=xp)), xp=xp)
+                _flipudlr(_call_cholesky(_flipudlr(precision, xp=xp), xp=xp), xp=xp)
                 for precision in precisions
             ]
         )
     elif covariance_type == "tied":
         precisions_cholesky = _flipudlr(
-            xp.linalg.cholesky(_flipudlr(precisions, xp=xp)), xp=xp
+            _call_cholesky(_flipudlr(precisions, xp=xp), xp=xp), xp=xp
         )
     else:
         precisions_cholesky = xp.sqrt(precisions)

From 341b659b1433bacb3c9704e4b6c2f517d03308fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Wed, 18 Jun 2025 11:14:51 +0200
Subject: [PATCH 86/92] More explicit use of scipy.linalg

---
 sklearn/mixture/_gaussian_mixture.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index f48b1211660a0..df9c50bfaa0bf 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -5,7 +5,7 @@
 import math
 
 import numpy as np
-from scipy import linalg
+import scipy.linalg
 
 from sklearn.externals.array_api_compat.common._helpers import is_numpy_namespace
 
@@ -321,14 +321,14 @@ def _estimate_gaussian_parameters(X, resp, reg_covar, covariance_type, xp=None):
 
 def _call_cholesky(covariance, xp):
     if is_numpy_namespace(xp):
-        return linalg.cholesky(covariance, lower=True)
+        return scipy.linalg.cholesky(covariance, lower=True)
     else:
         return xp.linalg.cholesky(covariance)
 
 
 def _call_solve(cov_chol, eye_matrix, xp):
     if is_numpy_namespace(xp):
-        return linalg.solve_triangular(cov_chol, eye_matrix, lower=True)
+        return scipy.linalg.solve_triangular(cov_chol, eye_matrix, lower=True)
     else:
         return xp.linalg.solve(cov_chol, eye_matrix)
 

From 7ffc5c7f5410ef39739c5f8693ff39384788a035 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Wed, 18 Jun 2025 12:51:26 +0200
Subject: [PATCH 87/92] [azure parallel] Increase rtol for float32 tests + some
 minor cleanups

---
 sklearn/mixture/_gaussian_mixture.py          | 36 ++++--------
 .../mixture/tests/test_gaussian_mixture.py    | 55 +++++++++++++------
 sklearn/utils/_array_api.py                   | 14 +++++
 3 files changed, 64 insertions(+), 41 deletions(-)

diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index df9c50bfaa0bf..05d42f68decbc 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -5,14 +5,16 @@
 import math
 
 import numpy as np
-import scipy.linalg
-
-from sklearn.externals.array_api_compat.common._helpers import is_numpy_namespace
 
 from .._config import get_config
 from ..externals import array_api_extra as xpx
 from ..utils import check_array
-from ..utils._array_api import get_namespace, get_namespace_and_device
+from ..utils._array_api import (
+    _cholesky,
+    _linalg_solve,
+    get_namespace,
+    get_namespace_and_device,
+)
 from ..utils._param_validation import StrOptions
 from ..utils.extmath import row_norms
 from ._base import BaseMixture, _check_shape
@@ -319,20 +321,6 @@ def _estimate_gaussian_parameters(X, resp, reg_covar, covariance_type, xp=None):
     return nk, means, covariances
 
 
-def _call_cholesky(covariance, xp):
-    if is_numpy_namespace(xp):
-        return scipy.linalg.cholesky(covariance, lower=True)
-    else:
-        return xp.linalg.cholesky(covariance)
-
-
-def _call_solve(cov_chol, eye_matrix, xp):
-    if is_numpy_namespace(xp):
-        return scipy.linalg.solve_triangular(cov_chol, eye_matrix, lower=True)
-    else:
-        return xp.linalg.solve(cov_chol, eye_matrix)
-
-
 def _compute_precision_cholesky(covariances, covariance_type, xp=None):
     """Compute the Cholesky decomposition of the precisions.
 
@@ -374,21 +362,21 @@ def _compute_precision_cholesky(covariances, covariance_type, xp=None):
         for k in range(covariances.shape[0]):
             covariance = covariances[k, :, :]
             try:
-                cov_chol = _call_cholesky(covariance, xp)
+                cov_chol = _cholesky(covariance, xp)
             # catch only numpy exceptions, b/c exceptions aren't part of array api spec
             except np.linalg.LinAlgError:
                 raise ValueError(estimate_precision_error_message)
-            precisions_chol[k, :, :] = _call_solve(
+            precisions_chol[k, :, :] = _linalg_solve(
                 cov_chol, xp.eye(n_features, dtype=dtype, device=device_), xp
             ).T
     elif covariance_type == "tied":
         _, n_features = covariances.shape
         try:
-            cov_chol = _call_cholesky(covariances, xp)
+            cov_chol = _cholesky(covariances, xp)
         # catch only numpy exceptions, since exceptions are not part of array api spec
         except np.linalg.LinAlgError:
             raise ValueError(estimate_precision_error_message)
-        precisions_chol = _call_solve(
+        precisions_chol = _linalg_solve(
             cov_chol, xp.eye(n_features, dtype=dtype, device=device_), xp
         ).T
     else:
@@ -441,13 +429,13 @@ def _compute_precision_cholesky_from_precisions(precisions, covariance_type, xp=
     if covariance_type == "full":
         precisions_cholesky = xp.asarray(
             [
-                _flipudlr(_call_cholesky(_flipudlr(precision, xp=xp), xp=xp), xp=xp)
+                _flipudlr(_cholesky(_flipudlr(precision, xp=xp), xp=xp), xp=xp)
                 for precision in precisions
             ]
         )
     elif covariance_type == "tied":
         precisions_cholesky = _flipudlr(
-            _call_cholesky(_flipudlr(precisions, xp=xp), xp=xp), xp=xp
+            _cholesky(_flipudlr(precisions, xp=xp), xp=xp), xp=xp
         )
     else:
         precisions_cholesky = xp.sqrt(precisions)
diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index b7e2847710438..fcbeaf81f1224 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -1491,22 +1491,24 @@ def test_gaussian_mixture_all_init_does_not_estimate_gaussian_parameters(
     ids=_get_namespace_device_dtype_ids,
 )
 def test_gaussian_mixture_array_api_compliance(
-    init_params, covariance_type, array_namespace, device_, dtype, global_random_seed
+    init_params, covariance_type, array_namespace, device_, dtype
 ):
     """Test that array api works in GaussianMixture.fit()."""
-    X, _ = make_blobs(
-        n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed
-    )
+    xp = _array_api_for_tests(array_namespace, device_)
+
+    rng = np.random.RandomState(0)
+    rand_data = RandomData(rng)
+    X = rand_data.X[covariance_type]
+    X = X.astype(dtype)
+
     gmm = GaussianMixture(
-        n_components=3,
+        n_components=rand_data.n_components,
         covariance_type=covariance_type,
-        random_state=global_random_seed,
+        random_state=0,
         init_params=init_params,
     )
-
     gmm.fit(X)
 
-    xp = _array_api_for_tests(array_namespace, device_)
     X_xp = xp.asarray(X, device=device_)
 
     with sklearn.config_context(array_api_dispatch=True):
@@ -1541,19 +1543,38 @@ def test_gaussian_mixture_array_api_compliance(
             assert isinstance(score, float)
 
     # Check methods
-    assert_allclose(gmm.predict(X), _convert_to_numpy(xp_predict, xp=xp))
-    assert_allclose(gmm.predict_proba(X), _convert_to_numpy(xp_predict_proba, xp=xp))
-    assert_allclose(gmm.score_samples(X), _convert_to_numpy(xp_score_samples, xp=xp))
-    assert_allclose(gmm.score(X), xp_score)
-    assert_allclose(gmm.aic(X), xp_aic)
-    assert_allclose(gmm.bic(X), xp_bic)
+    float32_rtol = 1e-4 if dtype == "float32" else 1e-7
+    increased_rtol = 5e-4 if dtype == "float32" else 1e-7
+
+    assert (
+        adjusted_rand_score(gmm.predict(X), _convert_to_numpy(xp_predict, xp=xp)) > 0.95
+    )
+    assert_allclose(
+        gmm.predict_proba(X),
+        _convert_to_numpy(xp_predict_proba, xp=xp),
+        rtol=increased_rtol,
+    )
+    assert_allclose(
+        gmm.score_samples(X),
+        _convert_to_numpy(xp_score_samples, xp=xp),
+        rtol=increased_rtol,
+    )
+    # comparing Python floats so need explicit rtol
+    assert_allclose(gmm.score(X), xp_score, rtol=float32_rtol)
+    assert_allclose(gmm.aic(X), xp_aic, rtol=float32_rtol)
+    assert_allclose(gmm.bic(X), xp_bic, rtol=float32_rtol)
     sample_X, sample_y = gmm.sample(10)
-    assert_allclose(sample_X, _convert_to_numpy(xp_sample_X, xp=xp))
+    # generated samples are float64 so need explicit rtol for dtype=float32
+    assert_allclose(sample_X, _convert_to_numpy(xp_sample_X, xp=xp), rtol=float32_rtol)
     assert_allclose(sample_y, _convert_to_numpy(xp_sample_y, xp=xp))
 
     # Check fitted attributes
     assert_allclose(gmm.means_, _convert_to_numpy(gmm_xp.means_, xp=xp))
-    assert_allclose(gmm.covariances_, _convert_to_numpy(gmm_xp.covariances_, xp=xp))
+    assert_allclose(
+        gmm.covariances_,
+        _convert_to_numpy(gmm_xp.covariances_, xp=xp),
+        rtol=increased_rtol,
+    )
 
 
 @pytest.mark.parametrize(
@@ -1609,7 +1630,7 @@ def test_gaussian_mixture_raises_where_array_api_not_implemented(
     init_params, array_namespace, device_, dtype
 ):
     X, _ = make_blobs(
-        n_samples=int(1e3),
+        n_samples=100,
         n_features=2,
         centers=3,
     )
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 3a318ffd60e80..cbaaa9f5168a9 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -1063,3 +1063,17 @@ def _logsumexp(array, axis=None, xp=None):
     out = out[()] if out.ndim == 0 else out
 
     return out
+
+
+def _cholesky(covariance, xp):
+    if _is_numpy_namespace(xp):
+        return scipy.linalg.cholesky(covariance, lower=True)
+    else:
+        return xp.linalg.cholesky(covariance)
+
+
+def _linalg_solve(cov_chol, eye_matrix, xp):
+    if _is_numpy_namespace(xp):
+        return scipy.linalg.solve_triangular(cov_chol, eye_matrix, lower=True)
+    else:
+        return xp.linalg.solve(cov_chol, eye_matrix)

From 3b95a5f316f274e027179c588732261adfef0b74 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Wed, 18 Jun 2025 13:03:07 +0200
Subject: [PATCH 88/92] rename variables

---
 .../mixture/tests/test_gaussian_mixture.py    | 42 +++++++++----------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index fcbeaf81f1224..74e2d9740d234 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -1520,26 +1520,26 @@ def test_gaussian_mixture_array_api_compliance(
         assert device(gmm_xp.means_) == device(X_xp)
         assert device(gmm_xp.covariances_) == device(X_xp)
 
-        xp_predict = gmm_xp.predict(X_xp)
-        xp_predict_proba = gmm_xp.predict_proba(X_xp)
-        xp_score_samples = gmm_xp.score_samples(X_xp)
-        xp_score = gmm_xp.score(X_xp)
-        xp_aic = gmm_xp.aic(X_xp)
-        xp_bic = gmm_xp.bic(X_xp)
-        xp_sample_X, xp_sample_y = gmm_xp.sample(10)
+        predict_xp = gmm_xp.predict(X_xp)
+        predict_proba_xp = gmm_xp.predict_proba(X_xp)
+        score_samples_xp = gmm_xp.score_samples(X_xp)
+        score_xp = gmm_xp.score(X_xp)
+        aic_xp = gmm_xp.aic(X_xp)
+        bic_xp = gmm_xp.bic(X_xp)
+        sample_X_xp, sample_y_xp = gmm_xp.sample(10)
 
         results = [
-            xp_predict,
-            xp_predict_proba,
-            xp_score_samples,
-            xp_sample_X,
-            xp_sample_y,
+            predict_xp,
+            predict_proba_xp,
+            score_samples_xp,
+            sample_X_xp,
+            sample_y_xp,
         ]
         for result in results:
             assert get_namespace(result)[0] == xp
             assert device(result) == device(X_xp)
 
-        for score in [xp_score, xp_aic, xp_bic]:
+        for score in [score_xp, aic_xp, bic_xp]:
             assert isinstance(score, float)
 
     # Check methods
@@ -1547,26 +1547,26 @@ def test_gaussian_mixture_array_api_compliance(
     increased_rtol = 5e-4 if dtype == "float32" else 1e-7
 
     assert (
-        adjusted_rand_score(gmm.predict(X), _convert_to_numpy(xp_predict, xp=xp)) > 0.95
+        adjusted_rand_score(gmm.predict(X), _convert_to_numpy(predict_xp, xp=xp)) > 0.95
     )
     assert_allclose(
         gmm.predict_proba(X),
-        _convert_to_numpy(xp_predict_proba, xp=xp),
+        _convert_to_numpy(predict_proba_xp, xp=xp),
         rtol=increased_rtol,
     )
     assert_allclose(
         gmm.score_samples(X),
-        _convert_to_numpy(xp_score_samples, xp=xp),
+        _convert_to_numpy(score_samples_xp, xp=xp),
         rtol=increased_rtol,
     )
     # comparing Python floats so need explicit rtol
-    assert_allclose(gmm.score(X), xp_score, rtol=float32_rtol)
-    assert_allclose(gmm.aic(X), xp_aic, rtol=float32_rtol)
-    assert_allclose(gmm.bic(X), xp_bic, rtol=float32_rtol)
+    assert_allclose(gmm.score(X), score_xp, rtol=float32_rtol)
+    assert_allclose(gmm.aic(X), aic_xp, rtol=float32_rtol)
+    assert_allclose(gmm.bic(X), bic_xp, rtol=float32_rtol)
     sample_X, sample_y = gmm.sample(10)
     # generated samples are float64 so need explicit rtol for dtype=float32
-    assert_allclose(sample_X, _convert_to_numpy(xp_sample_X, xp=xp), rtol=float32_rtol)
-    assert_allclose(sample_y, _convert_to_numpy(xp_sample_y, xp=xp))
+    assert_allclose(sample_X, _convert_to_numpy(sample_X_xp, xp=xp), rtol=float32_rtol)
+    assert_allclose(sample_y, _convert_to_numpy(sample_y_xp, xp=xp))
 
     # Check fitted attributes
     assert_allclose(gmm.means_, _convert_to_numpy(gmm_xp.means_, xp=xp))

From 45ba1ee8326afff0e2904b08aadb64ee1cb0617f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Wed, 18 Jun 2025 15:17:07 +0200
Subject: [PATCH 89/92] [azure parallel] test more precisely when array
 constructor arguments are passed in + fixes

---
 sklearn/mixture/_gaussian_mixture.py          |  14 ++-
 .../mixture/tests/test_gaussian_mixture.py    | 109 ++++++++----------
 2 files changed, 58 insertions(+), 65 deletions(-)

diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index 05d42f68decbc..cd6523d1d2784 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -51,7 +51,7 @@ def _check_weights(weights, n_components, xp=None):
 
     # check normalization
     atol = 1e-6 if weights.dtype == xp.float32 else 1e-8
-    if not xp.all(xpx.isclose(xp.abs(1.0 - xp.sum(weights)), 0.0, atol=atol, xp=xp)):
+    if not np.allclose(float(xp.abs(1.0 - xp.sum(weights))), 0.0, atol=atol):
         raise ValueError(
             "The parameter 'weights' should be normalized, but got sum(weights) = %.5f"
             % xp.sum(weights)
@@ -105,8 +105,8 @@ def _check_precision_matrix(precision, covariance_type, xp=None):
 def _check_precisions_full(precisions, covariance_type, xp=None):
     """Check the precision matrices are symmetric and positive-definite."""
     xp, _ = get_namespace(precisions, xp=xp)
-    for prec in precisions:
-        _check_precision_matrix(prec, covariance_type, xp=xp)
+    for i in range(precisions.shape[0]):
+        _check_precision_matrix(precisions[i, :, :], covariance_type, xp=xp)
 
 
 def _check_precisions(precisions, covariance_type, n_components, n_features, xp=None):
@@ -427,10 +427,12 @@ def _compute_precision_cholesky_from_precisions(precisions, covariance_type, xp=
         components. The shape depends on the covariance_type.
     """
     if covariance_type == "full":
-        precisions_cholesky = xp.asarray(
+        precisions_cholesky = xp.stack(
             [
-                _flipudlr(_cholesky(_flipudlr(precision, xp=xp), xp=xp), xp=xp)
-                for precision in precisions
+                _flipudlr(
+                    _cholesky(_flipudlr(precisions[i, :, :], xp=xp), xp=xp), xp=xp
+                )
+                for i in range(precisions.shape[0])
             ]
         )
     elif covariance_type == "tied":
diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index 74e2d9740d234..f03e86aa547dc 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -1490,8 +1490,14 @@ def test_gaussian_mixture_all_init_does_not_estimate_gaussian_parameters(
     yield_namespace_device_dtype_combinations(),
     ids=_get_namespace_device_dtype_ids,
 )
+@pytest.mark.parametrize("use_gmm_array_constructor_arguments", [False, True])
 def test_gaussian_mixture_array_api_compliance(
-    init_params, covariance_type, array_namespace, device_, dtype
+    init_params,
+    covariance_type,
+    array_namespace,
+    device_,
+    dtype,
+    use_gmm_array_constructor_arguments,
 ):
     """Test that array api works in GaussianMixture.fit()."""
     xp = _array_api_for_tests(array_namespace, device_)
@@ -1501,11 +1507,21 @@ def test_gaussian_mixture_array_api_compliance(
     X = rand_data.X[covariance_type]
     X = X.astype(dtype)
 
+    if use_gmm_array_constructor_arguments:
+        additional_kwargs = {
+            "means_init": rand_data.means.astype(dtype),
+            "precisions_init": rand_data.precisions[covariance_type].astype(dtype),
+            "weights_init": rand_data.weights.astype(dtype),
+        }
+    else:
+        additional_kwargs = {}
+
     gmm = GaussianMixture(
         n_components=rand_data.n_components,
         covariance_type=covariance_type,
         random_state=0,
         init_params=init_params,
+        **additional_kwargs,
     )
     gmm.fit(X)
 
@@ -1513,6 +1529,13 @@ def test_gaussian_mixture_array_api_compliance(
 
     with sklearn.config_context(array_api_dispatch=True):
         gmm_xp = sklearn.clone(gmm)
+        for param_name, param_value in additional_kwargs.items():
+            arg_xp = xp.asarray(param_value, device=device_)
+            setattr(gmm_xp, param_name, arg_xp)
+
+        print(gmm.means_init)
+        print(gmm.precisions_init)
+        print(gmm.weights_init)
         gmm_xp.fit(X_xp)
 
         assert get_namespace(gmm_xp.means_)[0] == xp
@@ -1542,10 +1565,27 @@ def test_gaussian_mixture_array_api_compliance(
         for score in [score_xp, aic_xp, bic_xp]:
             assert isinstance(score, float)
 
-    # Check methods
-    float32_rtol = 1e-4 if dtype == "float32" else 1e-7
+    # Define specific rtol to make tests pass
+    default_rtol = 1e-4 if dtype == "float32" else 1e-7
+    increased_atol = 1e-4 if dtype == "float32" else 0
     increased_rtol = 5e-4 if dtype == "float32" else 1e-7
 
+    # Check fitted attributes
+    assert_allclose(gmm.means_, _convert_to_numpy(gmm_xp.means_, xp=xp))
+    assert_allclose(
+        gmm.covariances_,
+        _convert_to_numpy(gmm_xp.covariances_, xp=xp),
+        atol=increased_atol,
+        rtol=increased_rtol,
+    )
+    assert_allclose(
+        gmm.precisions_,
+        _convert_to_numpy(gmm_xp.precisions_, xp=xp),
+        atol=increased_atol,
+        rtol=increased_rtol,
+    )
+
+    # Check methods
     assert (
         adjusted_rand_score(gmm.predict(X), _convert_to_numpy(predict_xp, xp=xp)) > 0.95
     )
@@ -1553,71 +1593,22 @@ def test_gaussian_mixture_array_api_compliance(
         gmm.predict_proba(X),
         _convert_to_numpy(predict_proba_xp, xp=xp),
         rtol=increased_rtol,
+        atol=increased_atol,
     )
     assert_allclose(
         gmm.score_samples(X),
         _convert_to_numpy(score_samples_xp, xp=xp),
         rtol=increased_rtol,
     )
-    # comparing Python floats so need explicit rtol
-    assert_allclose(gmm.score(X), score_xp, rtol=float32_rtol)
-    assert_allclose(gmm.aic(X), aic_xp, rtol=float32_rtol)
-    assert_allclose(gmm.bic(X), bic_xp, rtol=float32_rtol)
+    # comparing Python float so need explicit rtol when X has dtype float32
+    assert_allclose(gmm.score(X), score_xp, rtol=default_rtol)
+    assert_allclose(gmm.aic(X), aic_xp, rtol=default_rtol)
+    assert_allclose(gmm.bic(X), bic_xp, rtol=default_rtol)
     sample_X, sample_y = gmm.sample(10)
-    # generated samples are float64 so need explicit rtol for dtype=float32
-    assert_allclose(sample_X, _convert_to_numpy(sample_X_xp, xp=xp), rtol=float32_rtol)
+    # generated samples are float64 so need explicit rtol when X has dtype float32
+    assert_allclose(sample_X, _convert_to_numpy(sample_X_xp, xp=xp), rtol=default_rtol)
     assert_allclose(sample_y, _convert_to_numpy(sample_y_xp, xp=xp))
 
-    # Check fitted attributes
-    assert_allclose(gmm.means_, _convert_to_numpy(gmm_xp.means_, xp=xp))
-    assert_allclose(
-        gmm.covariances_,
-        _convert_to_numpy(gmm_xp.covariances_, xp=xp),
-        rtol=increased_rtol,
-    )
-
-
-@pytest.mark.parametrize(
-    "array_namespace, device_, dtype",
-    yield_namespace_device_dtype_combinations(),
-    ids=_get_namespace_device_dtype_ids,
-)
-def test_gaussian_mixture_array_api_compliance_with_array_like_constructor_parameters(
-    array_namespace, device_, dtype, global_random_seed
-):
-    """Check that array api works with array-like constructors: 'means_init',
-    'precisions_init' and 'weights_init'
-    """
-    n_features = 2
-    n_components = 3
-    X, _ = make_blobs(
-        n_samples=int(1e3),
-        n_features=n_features,
-        centers=3,
-        random_state=global_random_seed,
-    )
-    X = X.astype(dtype)
-
-    xp = _array_api_for_tests(array_namespace, device_)
-    X = xp.asarray(X, device=device_)
-
-    means_init = xp.zeros((n_components, n_features), device=device_, dtype=X.dtype)
-    precisions_init = xp.ones((n_components, n_features), device=device_, dtype=X.dtype)
-    gmm = GaussianMixture(
-        n_components=3,
-        covariance_type="diag",
-        random_state=global_random_seed,
-        init_params="random",
-        means_init=means_init,
-        precisions_init=precisions_init,
-        weights_init=xp.asarray([0.1, 0.4, 0.5]),
-    )
-
-    with sklearn.config_context(array_api_dispatch=True):
-        gmm.fit(X)
-
-        assert device(X) == device(gmm.weights_)
-
 
 @skip_if_array_api_compat_not_configured
 @pytest.mark.parametrize("init_params", ["kmeans", "k-means++"])

From 4f89101a5dd2468dd1bdc8fd8f5cb031c2b6cd0e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Wed, 18 Jun 2025 15:35:28 +0200
Subject: [PATCH 90/92] [azure parallel] Remove debug

---
 sklearn/mixture/tests/test_gaussian_mixture.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index f03e86aa547dc..42f62e59c1192 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -1533,9 +1533,6 @@ def test_gaussian_mixture_array_api_compliance(
             arg_xp = xp.asarray(param_value, device=device_)
             setattr(gmm_xp, param_name, arg_xp)
 
-        print(gmm.means_init)
-        print(gmm.precisions_init)
-        print(gmm.weights_init)
         gmm_xp.fit(X_xp)
 
         assert get_namespace(gmm_xp.means_)[0] == xp

From d2ca2097dc7056eab8ccc27550096e4c8d1a5c22 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Thu, 19 Jun 2025 11:14:54 +0200
Subject: [PATCH 91/92] Test more attributes

---
 sklearn/mixture/tests/test_gaussian_mixture.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index 42f62e59c1192..19510b4993329 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -1569,12 +1569,19 @@ def test_gaussian_mixture_array_api_compliance(
 
     # Check fitted attributes
     assert_allclose(gmm.means_, _convert_to_numpy(gmm_xp.means_, xp=xp))
+    assert_allclose(gmm.weights_, _convert_to_numpy(gmm_xp.weights_, xp=xp))
     assert_allclose(
         gmm.covariances_,
         _convert_to_numpy(gmm_xp.covariances_, xp=xp),
         atol=increased_atol,
         rtol=increased_rtol,
     )
+    assert_allclose(
+        gmm.precisions_cholesky_,
+        _convert_to_numpy(gmm_xp.precisions_cholesky_, xp=xp),
+        atol=increased_atol,
+        rtol=increased_rtol,
+    )
     assert_allclose(
         gmm.precisions_,
         _convert_to_numpy(gmm_xp.precisions_, xp=xp),

From d46840bdf6c3b42342cfb99efef51936c94576f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Thu, 19 Jun 2025 11:15:09 +0200
Subject: [PATCH 92/92] Increase tol to make tests pass

---
 sklearn/mixture/tests/test_gaussian_mixture.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index 19510b4993329..794a4dfc070ce 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -1564,8 +1564,8 @@ def test_gaussian_mixture_array_api_compliance(
 
     # Define specific rtol to make tests pass
     default_rtol = 1e-4 if dtype == "float32" else 1e-7
-    increased_atol = 1e-4 if dtype == "float32" else 0
-    increased_rtol = 5e-4 if dtype == "float32" else 1e-7
+    increased_atol = 5e-4 if dtype == "float32" else 0
+    increased_rtol = 1e-3 if dtype == "float32" else 1e-7
 
     # Check fitted attributes
     assert_allclose(gmm.means_, _convert_to_numpy(gmm_xp.means_, xp=xp))