From b04a9f794b48f4df9e85cb3a0035d574a60dcaff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Wed, 22 Jan 2025 12:09:45 +0100 Subject: [PATCH 01/92] wip --- gmm-array-api.py | 52 ++++++++++++++++++++++++++++++++++++++++ sklearn/mixture/_base.py | 30 ++++++++++++----------- 2 files changed, 68 insertions(+), 14 deletions(-) create mode 100644 gmm-array-api.py diff --git a/gmm-array-api.py b/gmm-array-api.py new file mode 100644 index 0000000000000..ca4145e5ace23 --- /dev/null +++ b/gmm-array-api.py @@ -0,0 +1,52 @@ +# %% +from sklearn.mixture import GaussianMixture +from sklearn.datasets import make_blobs +import sklearn +import numpy as np +import torch + +import os +os.environ['SCIPY_ARRAY_API'] = '1' + +X, y = make_blobs(n_samples=int(1e3), n_features=2, centers=3, random_state=0) +X, y = torch.asarray(X), torch.asarray(y) + +sklearn.set_config(array_api_dispatch=True) + +gmm = GaussianMixture(n_components=3, covariance_type="full", random_state=0, init_params="random").fit(X) +print(gmm.means_) +print(gmm.covariances_) + +# %% +import matplotlib.pyplot as plt +import matplotlib as mpl +fig, ax = plt.subplots() + +ax.scatter(X[:, 0], X[:, 1], c=y) + + +def make_ellipses(gmm, ax): + colors = ["navy", "turquoise", "darkorange"] + for n, color in enumerate(colors): + if gmm.covariance_type == "full": + covariances = gmm.covariances_[n][:2, :2] + elif gmm.covariance_type == "tied": + covariances = gmm.covariances_[:2, :2] + elif gmm.covariance_type == "diag": + covariances = np.diag(gmm.covariances_[n][:2]) + elif gmm.covariance_type == "spherical": + covariances = np.eye(gmm.means_.shape[1]) * gmm.covariances_[n] + v, w = np.linalg.eigh(covariances) + u = w[0] / np.linalg.norm(w[0]) + angle = np.arctan2(u[1], u[0]) + angle = 180 * angle / np.pi # convert to degrees + v = 2.0 * np.sqrt(2.0) * np.sqrt(v) + ell = mpl.patches.Ellipse( + gmm.means_[n, :2], v[0], v[1], angle=180 + angle, color=color + ) + ell.set_clip_box(ax.bbox) + ell.set_alpha(0.5) + ax.add_artist(ell) + ax.set_aspect("equal", "datalim") + +make_ellipses(gmm, ax) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index dd50d39b4fdb0..232077d81a840 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -18,6 +18,7 @@ from ..utils import check_random_state from ..utils._param_validation import Interval, StrOptions from ..utils.validation import check_is_fitted, validate_data +from ..utils._array_api import get_namespace def _check_shape(param, param_shape, name): @@ -95,7 +96,7 @@ def _check_parameters(self, X): """ pass - def _initialize_parameters(self, X, random_state): + def _initialize_parameters(self, X, random_state, xp): """Initialize the model parameters. Parameters @@ -109,7 +110,7 @@ def _initialize_parameters(self, X, random_state): n_samples, _ = X.shape if self.init_params == "kmeans": - resp = np.zeros((n_samples, self.n_components), dtype=X.dtype) + resp = xp.zeros((n_samples, self.n_components), dtype=X.dtype) label = ( cluster.KMeans( n_clusters=self.n_components, n_init=1, random_state=random_state @@ -117,26 +118,26 @@ def _initialize_parameters(self, X, random_state): .fit(X) .labels_ ) - resp[np.arange(n_samples), label] = 1 + resp[xp.arange(n_samples), label] = 1 elif self.init_params == "random": - resp = np.asarray( + resp = xp.asarray( random_state.uniform(size=(n_samples, self.n_components)), dtype=X.dtype ) - resp /= resp.sum(axis=1)[:, np.newaxis] + resp /= resp.sum(axis=1)[:, xp.newaxis] elif self.init_params == "random_from_data": - resp = np.zeros((n_samples, self.n_components), dtype=X.dtype) + resp = xp.zeros((n_samples, self.n_components), dtype=X.dtype) indices = random_state.choice( n_samples, size=self.n_components, replace=False ) - resp[indices, np.arange(self.n_components)] = 1 + resp[indices, xp.arange(self.n_components)] = 1 elif self.init_params == "k-means++": - resp = np.zeros((n_samples, self.n_components), dtype=X.dtype) + resp = xp.zeros((n_samples, self.n_components), dtype=X.dtype) _, indices = kmeans_plusplus( X, self.n_components, random_state=random_state, ) - resp[indices, np.arange(self.n_components)] = 1 + resp[indices, xp.arange(self.n_components)] = 1 self._initialize(X, resp) @@ -210,7 +211,8 @@ def fit_predict(self, X, y=None): labels : array, shape (n_samples,) Component labels. """ - X = validate_data(self, X, dtype=[np.float64, np.float32], ensure_min_samples=2) + xp, _ = get_namespace(X) + X = validate_data(self, X, dtype=[xp.float64, xp.float32], ensure_min_samples=2) if X.shape[0] < self.n_components: raise ValueError( "Expected n_samples >= n_components " @@ -223,7 +225,7 @@ def fit_predict(self, X, y=None): do_init = not (self.warm_start and hasattr(self, "converged_")) n_init = self.n_init if do_init else 1 - max_lower_bound = -np.inf + max_lower_bound = -xp.inf self.converged_ = False random_state = check_random_state(self.random_state) @@ -233,9 +235,9 @@ def fit_predict(self, X, y=None): self._print_verbose_msg_init_beg(init) if do_init: - self._initialize_parameters(X, random_state) + self._initialize_parameters(X, random_state, xp) - lower_bound = -np.inf if do_init else self.lower_bound_ + lower_bound = -xp.inf if do_init else self.lower_bound_ if self.max_iter == 0: best_params = self._get_parameters() @@ -258,7 +260,7 @@ def fit_predict(self, X, y=None): self._print_verbose_msg_init_end(lower_bound, converged) - if lower_bound > max_lower_bound or max_lower_bound == -np.inf: + if lower_bound > max_lower_bound or max_lower_bound == -xp.inf: max_lower_bound = lower_bound best_params = self._get_parameters() best_n_iter = n_iter From e6ba4e4f5d467aa39f7c8d88e5c2af3ebc06f17c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Wed, 22 Jan 2025 16:15:46 +0100 Subject: [PATCH 02/92] wip --- gmm-array-api.py | 20 +++++++++++++------- sklearn/mixture/_base.py | 2 +- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/gmm-array-api.py b/gmm-array-api.py index ca4145e5ace23..b87b4e819555a 100644 --- a/gmm-array-api.py +++ b/gmm-array-api.py @@ -1,25 +1,30 @@ # %% -from sklearn.mixture import GaussianMixture -from sklearn.datasets import make_blobs -import sklearn +import os + import numpy as np import torch -import os -os.environ['SCIPY_ARRAY_API'] = '1' +import sklearn +from sklearn.datasets import make_blobs +from sklearn.mixture import GaussianMixture + +os.environ["SCIPY_ARRAY_API"] = "1" X, y = make_blobs(n_samples=int(1e3), n_features=2, centers=3, random_state=0) X, y = torch.asarray(X), torch.asarray(y) sklearn.set_config(array_api_dispatch=True) -gmm = GaussianMixture(n_components=3, covariance_type="full", random_state=0, init_params="random").fit(X) +gmm = GaussianMixture( + n_components=3, covariance_type="full", random_state=0, init_params="random" +).fit(X) print(gmm.means_) print(gmm.covariances_) # %% -import matplotlib.pyplot as plt import matplotlib as mpl +import matplotlib.pyplot as plt + fig, ax = plt.subplots() ax.scatter(X[:, 0], X[:, 1], c=y) @@ -49,4 +54,5 @@ def make_ellipses(gmm, ax): ax.add_artist(ell) ax.set_aspect("equal", "datalim") + make_ellipses(gmm, ax) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index 232077d81a840..444f25b0512ba 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -16,9 +16,9 @@ from ..cluster import kmeans_plusplus from ..exceptions import ConvergenceWarning from ..utils import check_random_state +from ..utils._array_api import get_namespace from ..utils._param_validation import Interval, StrOptions from ..utils.validation import check_is_fitted, validate_data -from ..utils._array_api import get_namespace def _check_shape(param, param_shape, name): From 2226a55f1acb5e53f78bdf1e7984a8f3a5e42a0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Wed, 5 Feb 2025 11:17:06 +0100 Subject: [PATCH 03/92] stuck on linalg.cholesky array API support --- sklearn/mixture/_gaussian_mixture.py | 32 +++++++++++++++++----------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index a5b3a5ae5c172..79b2f74eb7f34 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -7,6 +7,7 @@ from scipy import linalg from ..utils import check_array +from ..utils._array_api import get_namespace from ..utils._param_validation import StrOptions from ..utils.extmath import row_norms from ._base import BaseMixture, _check_shape @@ -170,12 +171,14 @@ def _estimate_gaussian_covariances_full(resp, X, nk, means, reg_covar): covariances : array, shape (n_components, n_features, n_features) The covariance matrix of the current components. """ + xp, _ = get_namespace(X) n_components, n_features = means.shape - covariances = np.empty((n_components, n_features, n_features), dtype=X.dtype) + covariances = xp.empty((n_components, n_features, n_features), dtype=X.dtype) for k in range(n_components): diff = X - means[k] - covariances[k] = np.dot(resp[:, k] * diff.T, diff) / nk[k] - covariances[k].flat[:: n_features + 1] += reg_covar + covariances[k] = ((resp[:, k] * diff.T) @ diff) / nk[k] + my_flat = xp.reshape(covariances[k], (-1,)) + my_flat[:: n_features + 1] += reg_covar return covariances @@ -284,8 +287,9 @@ def _estimate_gaussian_parameters(X, resp, reg_covar, covariance_type): The covariance matrix of the current components. The shape depends of the covariance_type. """ - nk = resp.sum(axis=0) + 10 * np.finfo(resp.dtype).eps - means = np.dot(resp.T, X) / nk[:, np.newaxis] + xp, _ = get_namespace(X) + nk = resp.sum(axis=0) + 10 * xp.finfo(resp.dtype).eps + means = (resp.T @ X) / nk[:, xp.newaxis] covariances = { "full": _estimate_gaussian_covariances_full, "tied": _estimate_gaussian_covariances_tied, @@ -313,6 +317,8 @@ def _compute_precision_cholesky(covariances, covariance_type): The cholesky decomposition of sample precisions of the current components. The shape depends of the covariance_type. """ + xp, _ = get_namespace(covariances) + estimate_precision_error_message = ( "Fitting the mixture model failed because some components have " "ill-defined empirical covariance (for instance caused by singleton " @@ -320,7 +326,7 @@ def _compute_precision_cholesky(covariances, covariance_type): "increase reg_covar, or scale the input data." ) dtype = covariances.dtype - if dtype == np.float32: + if dtype == xp.float32: estimate_precision_error_message += ( " The numerical accuracy can also be improved by passing float64" " data instead of float32." @@ -328,14 +334,14 @@ def _compute_precision_cholesky(covariances, covariance_type): if covariance_type == "full": n_components, n_features, _ = covariances.shape - precisions_chol = np.empty((n_components, n_features, n_features), dtype=dtype) + precisions_chol = xp.empty((n_components, n_features, n_features), dtype=dtype) for k, covariance in enumerate(covariances): try: cov_chol = linalg.cholesky(covariance, lower=True) except linalg.LinAlgError: raise ValueError(estimate_precision_error_message) precisions_chol[k] = linalg.solve_triangular( - cov_chol, np.eye(n_features, dtype=dtype), lower=True + cov_chol, xp.eye(n_features, dtype=dtype), lower=True ).T elif covariance_type == "tied": _, n_features = covariances.shape @@ -344,12 +350,12 @@ def _compute_precision_cholesky(covariances, covariance_type): except linalg.LinAlgError: raise ValueError(estimate_precision_error_message) precisions_chol = linalg.solve_triangular( - cov_chol, np.eye(n_features, dtype=dtype), lower=True + cov_chol, xp.eye(n_features, dtype=dtype), lower=True ).T else: - if np.any(np.less_equal(covariances, 0.0)): + if xp.any(xp.less_equal(covariances, 0.0)): raise ValueError(estimate_precision_error_message) - precisions_chol = 1.0 / np.sqrt(covariances) + precisions_chol = 1.0 / xp.sqrt(covariances) return precisions_chol @@ -759,7 +765,7 @@ def _check_parameters(self, X): n_features, ) - def _initialize_parameters(self, X, random_state): + def _initialize_parameters(self, X, random_state, xp): # If all the initial parameters are all provided, then there is no need to run # the initialization. compute_resp = ( @@ -768,7 +774,7 @@ def _initialize_parameters(self, X, random_state): or self.precisions_init is None ) if compute_resp: - super()._initialize_parameters(X, random_state) + super()._initialize_parameters(X, random_state, xp) else: self._initialize(X, None) From b1fdee7b0b098efc7684a60c98869d02ad7b9f93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Wed, 5 Feb 2025 17:40:42 +0100 Subject: [PATCH 04/92] a bit further with xp.cholesky but now linalg.solve_triangular --- sklearn/mixture/_gaussian_mixture.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 79b2f74eb7f34..8f4be903eec2c 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -337,10 +337,10 @@ def _compute_precision_cholesky(covariances, covariance_type): precisions_chol = xp.empty((n_components, n_features, n_features), dtype=dtype) for k, covariance in enumerate(covariances): try: - cov_chol = linalg.cholesky(covariance, lower=True) - except linalg.LinAlgError: + cov_chol = xp.linalg.cholesky(covariance, upper=False) + except xp.linalg.LinAlgError: raise ValueError(estimate_precision_error_message) - precisions_chol[k] = linalg.solve_triangular( + precisions_chol[k] = xp.linalg.solve_triangular( cov_chol, xp.eye(n_features, dtype=dtype), lower=True ).T elif covariance_type == "tied": From 14fb0bae31decb1ecc32aac38bd1df83b5f81d0a Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Fri, 14 Feb 2025 11:54:53 +0100 Subject: [PATCH 05/92] more array api --- gmm-array-api.py | 4 ++- sklearn/mixture/_gaussian_mixture.py | 45 +++++++++++++++------------- 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/gmm-array-api.py b/gmm-array-api.py index b87b4e819555a..916e9728faeda 100644 --- a/gmm-array-api.py +++ b/gmm-array-api.py @@ -11,7 +11,7 @@ os.environ["SCIPY_ARRAY_API"] = "1" X, y = make_blobs(n_samples=int(1e3), n_features=2, centers=3, random_state=0) -X, y = torch.asarray(X), torch.asarray(y) +X_torch, y_torch = torch.asarray(X), torch.asarray(y) sklearn.set_config(array_api_dispatch=True) @@ -56,3 +56,5 @@ def make_ellipses(gmm, ax): make_ellipses(gmm, ax) + +# %% diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 8f4be903eec2c..53a764ea1f2fc 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -340,8 +340,8 @@ def _compute_precision_cholesky(covariances, covariance_type): cov_chol = xp.linalg.cholesky(covariance, upper=False) except xp.linalg.LinAlgError: raise ValueError(estimate_precision_error_message) - precisions_chol[k] = xp.linalg.solve_triangular( - cov_chol, xp.eye(n_features, dtype=dtype), lower=True + precisions_chol[k] = xp.linalg.solve( + cov_chol, xp.eye(n_features, dtype=dtype) ).T elif covariance_type == "tied": _, n_features = covariances.shape @@ -438,20 +438,21 @@ def _compute_log_det_cholesky(matrix_chol, covariance_type, n_features): log_det_precision_chol : array-like of shape (n_components,) The determinant of the precision matrix for each component. """ + xp, _ = get_namespace(matrix_chol) if covariance_type == "full": n_components, _, _ = matrix_chol.shape - log_det_chol = np.sum( - np.log(matrix_chol.reshape(n_components, -1)[:, :: n_features + 1]), axis=1 + log_det_chol = xp.sum( + xp.log(matrix_chol.reshape(n_components, -1)[:, :: n_features + 1]), axis=1 ) elif covariance_type == "tied": - log_det_chol = np.sum(np.log(np.diag(matrix_chol))) + log_det_chol = xp.sum(xp.log(xp.diagonal(matrix_chol))) elif covariance_type == "diag": - log_det_chol = np.sum(np.log(matrix_chol), axis=1) + log_det_chol = xp.sum(xp.log(matrix_chol), axis=1) else: - log_det_chol = n_features * np.log(matrix_chol) + log_det_chol = n_features * xp.log(matrix_chol) return log_det_chol @@ -478,6 +479,7 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type): ------- log_prob : array, shape (n_samples, n_components) """ + xp, _ = get_namespace(X, means, precisions_chol) n_samples, n_features = X.shape n_components, _ = means.shape # The determinant of the precision matrix from the Cholesky decomposition @@ -487,35 +489,38 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type): log_det = _compute_log_det_cholesky(precisions_chol, covariance_type, n_features) if covariance_type == "full": - log_prob = np.empty((n_samples, n_components), dtype=X.dtype) + log_prob = xp.empty((n_samples, n_components), dtype=X.dtype) for k, (mu, prec_chol) in enumerate(zip(means, precisions_chol)): - y = np.dot(X, prec_chol) - np.dot(mu, prec_chol) - log_prob[:, k] = np.sum(np.square(y), axis=1) + y = (X @ prec_chol) - (mu @ prec_chol) + log_prob[:, k] = xp.sum(xp.square(y), axis=1) elif covariance_type == "tied": - log_prob = np.empty((n_samples, n_components), dtype=X.dtype) + log_prob = xp.empty((n_samples, n_components), dtype=X.dtype) for k, mu in enumerate(means): - y = np.dot(X, precisions_chol) - np.dot(mu, precisions_chol) - log_prob[:, k] = np.sum(np.square(y), axis=1) + y = (X @ precisions_chol) - (mu @ precisions_chol) + log_prob[:, k] = xp.sum(xp.square(y), axis=1) elif covariance_type == "diag": precisions = precisions_chol**2 log_prob = ( - np.sum((means**2 * precisions), 1) - - 2.0 * np.dot(X, (means * precisions).T) - + np.dot(X**2, precisions.T) + xp.sum((means**2 * precisions), 1) + - 2.0 * (X @ (means * precisions).T) + + (X**2 @ precisions.T) ) elif covariance_type == "spherical": precisions = precisions_chol**2 log_prob = ( - np.sum(means**2, 1) * precisions - - 2 * np.dot(X, means.T * precisions) - + np.outer(row_norms(X, squared=True), precisions) + xp.sum(means**2, 1) * precisions + - 2 * (X @ means.T * precisions) + + xp.outer(row_norms(X, squared=True), precisions) ) # Since we are using the precision of the Cholesky decomposition, # `- 0.5 * log_det_precision` becomes `+ log_det_precision_chol` - return -0.5 * (n_features * np.log(2 * np.pi).astype(X.dtype) + log_prob) + log_det + return ( + -0.5 * (n_features * xp.log(xp.asarray(2 * xp.pi, dtype=X.dtype)) + log_prob) + + log_det + ) class GaussianMixture(BaseMixture): From 6010ff79a1bfa0092b2ade04f9a6d039d4d8455b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Wed, 19 Feb 2025 11:42:05 +0100 Subject: [PATCH 06/92] wip (problem with weights as numpy arrays) --- examples/mixture/plot_gmm_covariances.py | 3 +++ gmm-array-api.py | 12 +++++++----- sklearn/mixture/_base.py | 11 +++++++---- sklearn/mixture/_gaussian_mixture.py | 16 ++++++++++------ 4 files changed, 27 insertions(+), 15 deletions(-) diff --git a/examples/mixture/plot_gmm_covariances.py b/examples/mixture/plot_gmm_covariances.py index 91a26f518f332..5d752cc15dc9b 100644 --- a/examples/mixture/plot_gmm_covariances.py +++ b/examples/mixture/plot_gmm_covariances.py @@ -1,3 +1,4 @@ +# %% """ =============== GMM covariances @@ -136,3 +137,5 @@ def make_ellipses(gmm, ax): plt.show() + +# %% diff --git a/gmm-array-api.py b/gmm-array-api.py index 916e9728faeda..1541d44843c62 100644 --- a/gmm-array-api.py +++ b/gmm-array-api.py @@ -5,23 +5,25 @@ import torch import sklearn -from sklearn.datasets import make_blobs +from sklearn.datasets import make_blobs, load_iris from sklearn.mixture import GaussianMixture +import array_api_strict os.environ["SCIPY_ARRAY_API"] = "1" X, y = make_blobs(n_samples=int(1e3), n_features=2, centers=3, random_state=0) -X_torch, y_torch = torch.asarray(X), torch.asarray(y) +# X, y = torch.asarray(X), torch.asarray(y) +X, y = array_api_strict.asarray(X), array_api_strict.asarray(y) sklearn.set_config(array_api_dispatch=True) gmm = GaussianMixture( - n_components=3, covariance_type="full", random_state=0, init_params="random" + n_components=3, covariance_type="diag", random_state=0, init_params="random", + tol=1e-5, max_iter=1000 ).fit(X) print(gmm.means_) print(gmm.covariances_) -# %% import matplotlib as mpl import matplotlib.pyplot as plt @@ -57,4 +59,4 @@ def make_ellipses(gmm, ax): make_ellipses(gmm, ax) -# %% + # %% diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index 444f25b0512ba..e4f5b51e3b891 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -123,7 +123,7 @@ def _initialize_parameters(self, X, random_state, xp): resp = xp.asarray( random_state.uniform(size=(n_samples, self.n_components)), dtype=X.dtype ) - resp /= resp.sum(axis=1)[:, xp.newaxis] + resp /= xp.sum(resp, axis=1)[:, xp.newaxis] elif self.init_params == "random_from_data": resp = xp.zeros((n_samples, self.n_components), dtype=X.dtype) indices = random_state.choice( @@ -306,8 +306,9 @@ def _e_step(self, X): Logarithm of the posterior probabilities (or responsibilities) of the point of each sample in X. """ + xp, _ = get_namespace(X) log_prob_norm, log_resp = self._estimate_log_prob_resp(X) - return np.mean(log_prob_norm), log_resp + return xp.mean(log_prob_norm), log_resp @abstractmethod def _m_step(self, X, log_resp): @@ -403,8 +404,9 @@ def predict_proba(self, X): """ check_is_fitted(self) X = validate_data(self, X, reset=False) + xp, _ = get_namespace(X) _, log_resp = self._estimate_log_prob_resp(X) - return np.exp(log_resp) + return xp.exp(log_resp) def sample(self, n_samples=1): """Generate random samples from the fitted Gaussian distribution. @@ -526,11 +528,12 @@ def _estimate_log_prob_resp(self, X): log_responsibilities : array, shape (n_samples, n_components) logarithm of the responsibilities """ + xp, _ = get_namespace(X) weighted_log_prob = self._estimate_weighted_log_prob(X) log_prob_norm = logsumexp(weighted_log_prob, axis=1) with np.errstate(under="ignore"): # ignore underflow - log_resp = weighted_log_prob - log_prob_norm[:, np.newaxis] + log_resp = weighted_log_prob - log_prob_norm[:, xp.newaxis] return log_prob_norm, log_resp def _print_verbose_msg_init_beg(self, n_init): diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 53a764ea1f2fc..2820a954f35b3 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -230,7 +230,8 @@ def _estimate_gaussian_covariances_diag(resp, X, nk, means, reg_covar): covariances : array, shape (n_components, n_features) The covariance vector of the current components. """ - avg_X2 = np.dot(resp.T, X * X) / nk[:, np.newaxis] + xp, _ = get_namespace(X) + avg_X2 = (resp.T @ (X * X)) / nk[:, xp.newaxis] avg_means2 = means**2 return avg_X2 - avg_means2 + reg_covar @@ -288,7 +289,7 @@ def _estimate_gaussian_parameters(X, resp, reg_covar, covariance_type): The shape depends of the covariance_type. """ xp, _ = get_namespace(X) - nk = resp.sum(axis=0) + 10 * xp.finfo(resp.dtype).eps + nk = xp.sum(resp, axis=0) + 10 * xp.finfo(resp.dtype).eps means = (resp.T @ X) / nk[:, xp.newaxis] covariances = { "full": _estimate_gaussian_covariances_full, @@ -353,7 +354,7 @@ def _compute_precision_cholesky(covariances, covariance_type): cov_chol, xp.eye(n_features, dtype=dtype), lower=True ).T else: - if xp.any(xp.less_equal(covariances, 0.0)): + if xp.any(covariances <= 0.0): raise ValueError(estimate_precision_error_message) precisions_chol = 1.0 / xp.sqrt(covariances) return precisions_chol @@ -503,7 +504,7 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type): elif covariance_type == "diag": precisions = precisions_chol**2 log_prob = ( - xp.sum((means**2 * precisions), 1) + xp.sum((means**2 * precisions), axis=1) - 2.0 * (X @ (means * precisions).T) + (X**2 @ precisions.T) ) @@ -814,6 +815,7 @@ def _initialize(self, X, resp): self.precisions_init, self.covariance_type ) + def _m_step(self, X, log_resp): """M step. @@ -825,8 +827,9 @@ def _m_step(self, X, log_resp): Logarithm of the posterior probabilities (or responsibilities) of the point of each sample in X. """ + xp, _ = get_namespace(X, log_resp) self.weights_, self.means_, self.covariances_ = _estimate_gaussian_parameters( - X, np.exp(log_resp), self.reg_covar, self.covariance_type + X, xp.exp(log_resp), self.reg_covar, self.covariance_type ) self.weights_ /= self.weights_.sum() self.precisions_cholesky_ = _compute_precision_cholesky( @@ -839,7 +842,8 @@ def _estimate_log_prob(self, X): ) def _estimate_log_weights(self): - return np.log(self.weights_) + xp, _ = get_namespace(self.weights_) + return xp.log(self.weights_) def _compute_lower_bound(self, _, log_prob_norm): return log_prob_norm From aa2a3831f3a1fe9d4fc41879ee47727e1c1a3996 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Fri, 21 Feb 2025 14:57:27 +0100 Subject: [PATCH 07/92] array api for covariance_type='diag' and init_params='random' --- gmm-array-api.py | 19 +++++++++++++------ sklearn/mixture/_base.py | 2 +- sklearn/mixture/_gaussian_mixture.py | 3 +-- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/gmm-array-api.py b/gmm-array-api.py index 1541d44843c62..d96b6a14bd497 100644 --- a/gmm-array-api.py +++ b/gmm-array-api.py @@ -1,13 +1,12 @@ # %% import os +import array_api_strict import numpy as np -import torch import sklearn -from sklearn.datasets import make_blobs, load_iris +from sklearn.datasets import make_blobs from sklearn.mixture import GaussianMixture -import array_api_strict os.environ["SCIPY_ARRAY_API"] = "1" @@ -18,8 +17,12 @@ sklearn.set_config(array_api_dispatch=True) gmm = GaussianMixture( - n_components=3, covariance_type="diag", random_state=0, init_params="random", - tol=1e-5, max_iter=1000 + n_components=3, + covariance_type="diag", + random_state=0, + init_params="random", + tol=1e-5, + max_iter=1000, ).fit(X) print(gmm.means_) print(gmm.covariances_) @@ -29,10 +32,14 @@ fig, ax = plt.subplots() +X = np.asarray(X) +y = np.asarray(y) + ax.scatter(X[:, 0], X[:, 1], c=y) def make_ellipses(gmm, ax): + gmm.covariances_ = np.asarray(gmm.covariances_) colors = ["navy", "turquoise", "darkorange"] for n, color in enumerate(colors): if gmm.covariance_type == "full": @@ -59,4 +66,4 @@ def make_ellipses(gmm, ax): make_ellipses(gmm, ax) - # %% +# %% diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index e4f5b51e3b891..f24db803a29ab 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -288,7 +288,7 @@ def fit_predict(self, X, y=None): # for any value of max_iter and tol (and any random_state). _, log_resp = self._e_step(X) - return log_resp.argmax(axis=1) + return xp.argmax(log_resp, axis=1) def _e_step(self, X): """E step. diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 2820a954f35b3..179a84aede43e 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -815,7 +815,6 @@ def _initialize(self, X, resp): self.precisions_init, self.covariance_type ) - def _m_step(self, X, log_resp): """M step. @@ -831,7 +830,7 @@ def _m_step(self, X, log_resp): self.weights_, self.means_, self.covariances_ = _estimate_gaussian_parameters( X, xp.exp(log_resp), self.reg_covar, self.covariance_type ) - self.weights_ /= self.weights_.sum() + self.weights_ /= xp.sum(self.weights_) self.precisions_cholesky_ = _compute_precision_cholesky( self.covariances_, self.covariance_type ) From de4f3a592646729873838b5e9ed414e482cb7ba8 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Fri, 21 Feb 2025 15:19:19 +0100 Subject: [PATCH 08/92] add simple test --- .../mixture/tests/test_gaussian_mixture.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index e8144ada64f67..5ef5573d4e372 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -17,6 +17,7 @@ from sklearn.cluster import KMeans from sklearn.covariance import EmpiricalCovariance from sklearn.datasets import make_spd_matrix +from sklearn.datasets._samples_generator import make_blobs from sklearn.exceptions import ConvergenceWarning, NotFittedError from sklearn.metrics.cluster import adjusted_rand_score from sklearn.mixture import GaussianMixture @@ -29,7 +30,9 @@ _estimate_gaussian_covariances_tied, _estimate_gaussian_parameters, ) +from sklearn.utils._array_api import yield_namespace_device_dtype_combinations from sklearn.utils._testing import ( + _array_api_for_tests, assert_allclose, assert_almost_equal, assert_array_almost_equal, @@ -1470,3 +1473,22 @@ def test_gaussian_mixture_all_init_does_not_estimate_gaussian_parameters( # The initial gaussian parameters are not estimated. They are estimated for every # m_step. assert mock.call_count == gm.n_iter_ + + +@pytest.mark.parametrize( + "array_namespace, device, dtype", yield_namespace_device_dtype_combinations() +) +def test_gaussian_mixture_array_api_compliance(array_namespace, device, dtype): + X, y = make_blobs(n_samples=int(1e3), n_features=2, centers=3, random_state=0) + xp = _array_api_for_tests(array_namespace, device) + X = xp.asarray(X, device=device) + y = xp.asarray(y, device=device) + with sklearn.config_context(array_api_dispatch=True): + gmm = GaussianMixture( + n_components=3, + covariance_type="diag", + random_state=0, + init_params="random", + tol=1e-5, + max_iter=1000, + ).fit(X) From 7974931d9c913ba1e5ab605cf0179ecc35ca6369 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 21 Feb 2025 15:28:15 +0100 Subject: [PATCH 09/92] Add comments about tricky bits --- sklearn/mixture/_gaussian_mixture.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 179a84aede43e..47ca220cfe7d2 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -338,9 +338,14 @@ def _compute_precision_cholesky(covariances, covariance_type): precisions_chol = xp.empty((n_components, n_features, n_features), dtype=dtype) for k, covariance in enumerate(covariances): try: + # TODO we are using xp.linalg instead of scipy.linalg.cholesky, + # maybe separate branches for array API and numpy? cov_chol = xp.linalg.cholesky(covariance, upper=False) except xp.linalg.LinAlgError: raise ValueError(estimate_precision_error_message) + + # TODO we are using xp.linalg.solve instead of scipy.linalg.solve_triangular + # probably separate branches for array API and numpy? precisions_chol[k] = xp.linalg.solve( cov_chol, xp.eye(n_features, dtype=dtype) ).T From 08e5f9b88cad823344b8c96125c2f353ab62a18b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 21 Feb 2025 15:42:30 +0100 Subject: [PATCH 10/92] lint --- gmm-array-api.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/gmm-array-api.py b/gmm-array-api.py index d96b6a14bd497..86fddbeeb97b1 100644 --- a/gmm-array-api.py +++ b/gmm-array-api.py @@ -1,7 +1,12 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% import os import array_api_strict +import matplotlib as mpl +import matplotlib.pyplot as plt import numpy as np import sklearn @@ -27,9 +32,6 @@ print(gmm.means_) print(gmm.covariances_) -import matplotlib as mpl -import matplotlib.pyplot as plt - fig, ax = plt.subplots() X = np.asarray(X) From 0f525efa104cf50263daf25f627a857bb5d814f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 21 Feb 2025 16:01:13 +0100 Subject: [PATCH 11/92] one more comment --- gmm-array-api.py | 1 + sklearn/mixture/_base.py | 1 + 2 files changed, 2 insertions(+) diff --git a/gmm-array-api.py b/gmm-array-api.py index 86fddbeeb97b1..f0da95a8aca9e 100644 --- a/gmm-array-api.py +++ b/gmm-array-api.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: BSD-3-Clause # %% + import os import array_api_strict diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index f24db803a29ab..4e4c906a054c7 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -530,6 +530,7 @@ def _estimate_log_prob_resp(self, X): """ xp, _ = get_namespace(X) weighted_log_prob = self._estimate_weighted_log_prob(X) + # TODO scipy.special.logsumexp needs scipy >= 1.15 for array API support log_prob_norm = logsumexp(weighted_log_prob, axis=1) with np.errstate(under="ignore"): # ignore underflow From 4801e2bc48466d316485a80278ddc4d9f588af01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 28 Feb 2025 14:21:58 +0100 Subject: [PATCH 12/92] revert unwanted change --- examples/mixture/plot_gmm_covariances.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/examples/mixture/plot_gmm_covariances.py b/examples/mixture/plot_gmm_covariances.py index 5d752cc15dc9b..91a26f518f332 100644 --- a/examples/mixture/plot_gmm_covariances.py +++ b/examples/mixture/plot_gmm_covariances.py @@ -1,4 +1,3 @@ -# %% """ =============== GMM covariances @@ -137,5 +136,3 @@ def make_ellipses(gmm, ax): plt.show() - -# %% From de1343c72a1a6de908d9e8684e4ba8a3f7e30a68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 28 Feb 2025 14:41:35 +0100 Subject: [PATCH 13/92] fix test_bayesian_mixture --- sklearn/mixture/tests/test_bayesian_mixture.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sklearn/mixture/tests/test_bayesian_mixture.py b/sklearn/mixture/tests/test_bayesian_mixture.py index d17e6710ee5a7..04e6cda745865 100644 --- a/sklearn/mixture/tests/test_bayesian_mixture.py +++ b/sklearn/mixture/tests/test_bayesian_mixture.py @@ -12,6 +12,7 @@ from sklearn.mixture import BayesianGaussianMixture from sklearn.mixture._bayesian_mixture import _log_dirichlet_norm, _log_wishart_norm from sklearn.mixture.tests.test_gaussian_mixture import RandomData +from sklearn.utils._array_api import get_namespace from sklearn.utils._testing import ( assert_almost_equal, assert_array_equal, @@ -259,6 +260,7 @@ def test_compare_covar_type(): rand_data = RandomData(rng, scale=7) X = rand_data.X["full"] n_components = rand_data.n_components + xp, _ = get_namespace(X) for prior_type in PRIOR_TYPE: # Computation of the full_covariance @@ -271,7 +273,7 @@ def test_compare_covar_type(): tol=1e-7, ) bgmm._check_parameters(X) - bgmm._initialize_parameters(X, np.random.RandomState(0)) + bgmm._initialize_parameters(X, np.random.RandomState(0), xp=xp) full_covariances = ( bgmm.covariances_ * bgmm.degrees_of_freedom_[:, np.newaxis, np.newaxis] ) @@ -286,7 +288,7 @@ def test_compare_covar_type(): tol=1e-7, ) bgmm._check_parameters(X) - bgmm._initialize_parameters(X, np.random.RandomState(0)) + bgmm._initialize_parameters(X, np.random.RandomState(0), xp=xp) tied_covariance = bgmm.covariances_ * bgmm.degrees_of_freedom_ assert_almost_equal(tied_covariance, np.mean(full_covariances, 0)) @@ -301,7 +303,7 @@ def test_compare_covar_type(): tol=1e-7, ) bgmm._check_parameters(X) - bgmm._initialize_parameters(X, np.random.RandomState(0)) + bgmm._initialize_parameters(X, np.random.RandomState(0), xp=xp) diag_covariances = bgmm.covariances_ * bgmm.degrees_of_freedom_[:, np.newaxis] assert_almost_equal( @@ -318,7 +320,7 @@ def test_compare_covar_type(): tol=1e-7, ) bgmm._check_parameters(X) - bgmm._initialize_parameters(X, np.random.RandomState(0)) + bgmm._initialize_parameters(X, np.random.RandomState(0), xp=xp) spherical_covariances = bgmm.covariances_ * bgmm.degrees_of_freedom_ assert_almost_equal(spherical_covariances, np.mean(diag_covariances, 1)) From b05eca06826157df44260b7b8b774a27b2a38898 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 28 Feb 2025 15:22:59 +0100 Subject: [PATCH 14/92] Compare to numpy result in test --- .../mixture/tests/test_gaussian_mixture.py | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index 5ef5573d4e372..5a8c40f00e301 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1480,15 +1480,21 @@ def test_gaussian_mixture_all_init_does_not_estimate_gaussian_parameters( ) def test_gaussian_mixture_array_api_compliance(array_namespace, device, dtype): X, y = make_blobs(n_samples=int(1e3), n_features=2, centers=3, random_state=0) + gmm = GaussianMixture( + n_components=3, + covariance_type="diag", + random_state=0, + init_params="random", + ) + gmm.fit(X) + means_ref = gmm.means_ + covariances_ref = gmm.covariances_ + xp = _array_api_for_tests(array_namespace, device) X = xp.asarray(X, device=device) y = xp.asarray(y, device=device) with sklearn.config_context(array_api_dispatch=True): - gmm = GaussianMixture( - n_components=3, - covariance_type="diag", - random_state=0, - init_params="random", - tol=1e-5, - max_iter=1000, - ).fit(X) + gmm.fit(X) + + assert_allclose(means_ref, gmm.means_) + assert_allclose(covariances_ref, gmm.covariances_) From c35bdd6563798736f932833d6380c5271e43bc80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 28 Feb 2025 15:28:00 +0100 Subject: [PATCH 15/92] Use global_random_seed --- sklearn/mixture/tests/test_gaussian_mixture.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index 5a8c40f00e301..4d17fdc31d4a2 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1478,12 +1478,16 @@ def test_gaussian_mixture_all_init_does_not_estimate_gaussian_parameters( @pytest.mark.parametrize( "array_namespace, device, dtype", yield_namespace_device_dtype_combinations() ) -def test_gaussian_mixture_array_api_compliance(array_namespace, device, dtype): - X, y = make_blobs(n_samples=int(1e3), n_features=2, centers=3, random_state=0) +def test_gaussian_mixture_array_api_compliance( + array_namespace, device, dtype, global_random_seed +): + X, y = make_blobs( + n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed + ) gmm = GaussianMixture( n_components=3, covariance_type="diag", - random_state=0, + random_state=global_random_seed, init_params="random", ) gmm.fit(X) From 4516920cf884425caad2075055b1e8646351d6e0 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Wed, 12 Mar 2025 14:30:03 +0100 Subject: [PATCH 16/92] retrigger CI From e9740511b8e290447f20b39e43c079888a733098 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Wed, 12 Mar 2025 14:30:39 +0100 Subject: [PATCH 17/92] retrigger CI From 1a7f262160c143e075401d165467ab341928fcb1 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Wed, 12 Mar 2025 14:36:18 +0100 Subject: [PATCH 18/92] retrigger CI [azure parallel] From fb408708d32ef32104392bf837634bd41b97b64a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Thu, 13 Mar 2025 15:43:15 +0100 Subject: [PATCH 19/92] A bit further with setting the device more correctly --- sklearn/mixture/_base.py | 7 +- sklearn/mixture/_gaussian_mixture.py | 10 ++- .../mixture/tests/test_gaussian_mixture.py | 13 +++- sklearn/utils/_array_api.py | 68 +++++++++++++++---- 4 files changed, 77 insertions(+), 21 deletions(-) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index 4e4c906a054c7..892602415a88a 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -16,7 +16,7 @@ from ..cluster import kmeans_plusplus from ..exceptions import ConvergenceWarning from ..utils import check_random_state -from ..utils._array_api import get_namespace +from ..utils._array_api import get_namespace, get_namespace_and_device from ..utils._param_validation import Interval, StrOptions from ..utils.validation import check_is_fitted, validate_data @@ -120,8 +120,11 @@ def _initialize_parameters(self, X, random_state, xp): ) resp[xp.arange(n_samples), label] = 1 elif self.init_params == "random": + xp, _, device = get_namespace_and_device(X) resp = xp.asarray( - random_state.uniform(size=(n_samples, self.n_components)), dtype=X.dtype + random_state.uniform(size=(n_samples, self.n_components)), + dtype=X.dtype, + device=device, ) resp /= xp.sum(resp, axis=1)[:, xp.newaxis] elif self.init_params == "random_from_data": diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 47ca220cfe7d2..93705882d1486 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -7,7 +7,7 @@ from scipy import linalg from ..utils import check_array -from ..utils._array_api import get_namespace +from ..utils._array_api import get_namespace, get_namespace_and_device from ..utils._param_validation import StrOptions from ..utils.extmath import row_norms from ._base import BaseMixture, _check_shape @@ -485,7 +485,7 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type): ------- log_prob : array, shape (n_samples, n_components) """ - xp, _ = get_namespace(X, means, precisions_chol) + xp, _, device = get_namespace_and_device(X, means, precisions_chol) n_samples, n_features = X.shape n_components, _ = means.shape # The determinant of the precision matrix from the Cholesky decomposition @@ -524,7 +524,11 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type): # Since we are using the precision of the Cholesky decomposition, # `- 0.5 * log_det_precision` becomes `+ log_det_precision_chol` return ( - -0.5 * (n_features * xp.log(xp.asarray(2 * xp.pi, dtype=X.dtype)) + log_prob) + -0.5 + * ( + n_features * xp.log(xp.asarray(2 * xp.pi, dtype=X.dtype, device=device)) + + log_prob + ) + log_det ) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index 4d17fdc31d4a2..62d3c606124d4 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -30,7 +30,10 @@ _estimate_gaussian_covariances_tied, _estimate_gaussian_parameters, ) -from sklearn.utils._array_api import yield_namespace_device_dtype_combinations +from sklearn.utils._array_api import ( + _convert_to_numpy, + yield_namespace_device_dtype_combinations, +) from sklearn.utils._testing import ( _array_api_for_tests, assert_allclose, @@ -1500,5 +1503,9 @@ def test_gaussian_mixture_array_api_compliance( with sklearn.config_context(array_api_dispatch=True): gmm.fit(X) - assert_allclose(means_ref, gmm.means_) - assert_allclose(covariances_ref, gmm.covariances_) + # TODO is there an easy way to test device? device can be None or 'cpu' in + # the numpy case ... + # assert gmm.means_.device == device + # assert gmm.covariances_.device == device + assert_allclose(means_ref, _convert_to_numpy(gmm.means_, xp=xp)) + assert_allclose(covariances_ref, _convert_to_numpy(gmm.covariances_, xp=xp)) diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py index 65503a0674a70..e65ebcce169b2 100644 --- a/sklearn/utils/_array_api.py +++ b/sklearn/utils/_array_api.py @@ -82,6 +82,19 @@ def yield_namespace_device_dtype_combinations(include_numpy_namespaces=True): ): yield array_namespace, device, dtype yield array_namespace, "mps", "float32" + + elif array_namespace == "array_api_strict": + try: + import array_api_strict # noqa + + yield array_namespace, array_api_strict.Device("CPU_DEVICE"), "float64" + yield array_namespace, array_api_strict.Device("device1"), "float32" + except ImportError: + # Those combinations will typically be skipped by pytest if + # array_api_strict is not installed but we still need to see them in + # the test output. + yield array_namespace, "CPU_DEVICE", "float64" + yield array_namespace, "device1", "float32" else: yield array_namespace, None, None @@ -582,12 +595,14 @@ def get_namespace(*arrays, remove_none=True, remove_types=(str,), xp=None): if namespace.__name__ == "array_api_strict" and hasattr( namespace, "set_array_api_strict_flags" ): - namespace.set_array_api_strict_flags(api_version="2023.12") + namespace.set_array_api_strict_flags(api_version="2024.12") return namespace, is_array_api_compliant -def get_namespace_and_device(*array_list, remove_none=True, remove_types=(str,)): +def get_namespace_and_device( + *array_list, remove_none=True, remove_types=(str,), xp=None +): """Combination into one single function of `get_namespace` and `device`. Parameters @@ -598,6 +613,10 @@ def get_namespace_and_device(*array_list, remove_none=True, remove_types=(str,)) Whether to ignore None objects passed in arrays. remove_types : tuple or list, default=(str,) Types to ignore in the arrays. + xp : module, default=None + Precomputed array namespace module. When passed, typically from a caller + that has already performed inspection of its own inputs, skips array + namespace inspection. Returns ------- @@ -610,16 +629,20 @@ def get_namespace_and_device(*array_list, remove_none=True, remove_types=(str,)) device : device `device` object (see the "Device Support" section of the array API spec). """ + skip_remove_kwargs = dict(remove_none=False, remove_types=[]) + array_list = _remove_non_arrays( *array_list, remove_none=remove_none, remove_types=remove_types, ) + arrays_device = device(*array_list, **skip_remove_kwargs) - skip_remove_kwargs = dict(remove_none=False, remove_types=[]) + if xp is None: + xp, is_array_api = get_namespace(*array_list, **skip_remove_kwargs) + else: + xp, is_array_api = xp, True - xp, is_array_api = get_namespace(*array_list, **skip_remove_kwargs) - arrays_device = device(*array_list, **skip_remove_kwargs) if is_array_api: return xp, is_array_api, arrays_device else: @@ -769,49 +792,66 @@ def _average(a, axis=None, weights=None, normalize=True, xp=None): return sum_ / scale +def _xlogy(x, y, xp=None): + # TODO: Remove this once https://github.com/scipy/scipy/issues/21736 is fixed + xp, _, device_ = get_namespace_and_device(x, y, xp=xp) + + with numpy.errstate(divide="ignore", invalid="ignore"): + temp = x * xp.log(y) + return xp.where(x == 0.0, xp.asarray(0.0, dtype=temp.dtype, device=device_), temp) + + def _nanmin(X, axis=None, xp=None): # TODO: refactor once nan-aware reductions are standardized: # https://github.com/data-apis/array-api/issues/621 - xp, _ = get_namespace(X, xp=xp) + xp, _, device_ = get_namespace_and_device(X, xp=xp) if _is_numpy_namespace(xp): return xp.asarray(numpy.nanmin(X, axis=axis)) else: mask = xp.isnan(X) - X = xp.min(xp.where(mask, xp.asarray(+xp.inf, device=device(X)), X), axis=axis) + X = xp.min( + xp.where(mask, xp.asarray(+xp.inf, dtype=X.dtype, device=device_), X), + axis=axis, + ) # Replace Infs from all NaN slices with NaN again mask = xp.all(mask, axis=axis) if xp.any(mask): - X = xp.where(mask, xp.asarray(xp.nan), X) + X = xp.where(mask, xp.asarray(xp.nan, dtype=X.dtype, device=device_), X) return X def _nanmax(X, axis=None, xp=None): # TODO: refactor once nan-aware reductions are standardized: # https://github.com/data-apis/array-api/issues/621 - xp, _ = get_namespace(X, xp=xp) + xp, _, device_ = get_namespace_and_device(X, xp=xp) if _is_numpy_namespace(xp): return xp.asarray(numpy.nanmax(X, axis=axis)) else: mask = xp.isnan(X) - X = xp.max(xp.where(mask, xp.asarray(-xp.inf, device=device(X)), X), axis=axis) + X = xp.max( + xp.where(mask, xp.asarray(-xp.inf, dtype=X.dtype, device=device_), X), + axis=axis, + ) # Replace Infs from all NaN slices with NaN again mask = xp.all(mask, axis=axis) if xp.any(mask): - X = xp.where(mask, xp.asarray(xp.nan), X) + X = xp.where(mask, xp.asarray(xp.nan, dtype=X.dtype, device=device_), X) return X def _nanmean(X, axis=None, xp=None): # TODO: refactor once nan-aware reductions are standardized: # https://github.com/data-apis/array-api/issues/621 - xp, _ = get_namespace(X, xp=xp) + xp, _, device_ = get_namespace_and_device(X, xp=xp) if _is_numpy_namespace(xp): return xp.asarray(numpy.nanmean(X, axis=axis)) else: mask = xp.isnan(X) - total = xp.sum(xp.where(mask, xp.asarray(0.0, device=device(X)), X), axis=axis) + total = xp.sum( + xp.where(mask, xp.asarray(0.0, dtype=X.dtype, device=device_), X), axis=axis + ) count = xp.sum(xp.astype(xp.logical_not(mask), X.dtype), axis=axis) return total / count @@ -868,6 +908,8 @@ def _convert_to_numpy(array, xp): return array.cpu().numpy() elif xp_name in {"array_api_compat.cupy", "cupy"}: # pragma: nocover return array.get() + elif xp_name in {"array_api_strict"}: + return numpy.asarray(xp.asarray(array, device=xp.Device("CPU_DEVICE"))) return numpy.asarray(array) From f2eba56128d6de80217cde6d8a3422f1b0697126 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 14 Mar 2025 16:51:03 +0100 Subject: [PATCH 20/92] Add our own implementation of logsumexp [azure parallel] --- sklearn/mixture/_base.py | 7 +++---- sklearn/utils/_array_api.py | 7 +++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index 892602415a88a..9a364cd8c01ea 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -9,14 +9,13 @@ from time import time import numpy as np -from scipy.special import logsumexp from .. import cluster from ..base import BaseEstimator, DensityMixin, _fit_context from ..cluster import kmeans_plusplus from ..exceptions import ConvergenceWarning from ..utils import check_random_state -from ..utils._array_api import get_namespace, get_namespace_and_device +from ..utils._array_api import _logsumexp, get_namespace, get_namespace_and_device from ..utils._param_validation import Interval, StrOptions from ..utils.validation import check_is_fitted, validate_data @@ -352,7 +351,7 @@ def score_samples(self, X): check_is_fitted(self) X = validate_data(self, X, reset=False) - return logsumexp(self._estimate_weighted_log_prob(X), axis=1) + return _logsumexp(self._estimate_weighted_log_prob(X), axis=1) def score(self, X, y=None): """Compute the per-sample average log-likelihood of the given data X. @@ -534,7 +533,7 @@ def _estimate_log_prob_resp(self, X): xp, _ = get_namespace(X) weighted_log_prob = self._estimate_weighted_log_prob(X) # TODO scipy.special.logsumexp needs scipy >= 1.15 for array API support - log_prob_norm = logsumexp(weighted_log_prob, axis=1) + log_prob_norm = _logsumexp(weighted_log_prob, axis=1) with np.errstate(under="ignore"): # ignore underflow log_resp = weighted_log_prob - log_prob_norm[:, xp.newaxis] diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py index e65ebcce169b2..976044525c669 100644 --- a/sklearn/utils/_array_api.py +++ b/sklearn/utils/_array_api.py @@ -1150,3 +1150,10 @@ def _tolist(array, xp=None): return array.tolist() array_np = _convert_to_numpy(array, xp=xp) return [element.item() for element in array_np] + + +def _logsumexp(array, axis=None, xp=None): + # TODO replace by scipy.special.logsumexp when + # https://github.com/scipy/scipy/pull/22683 is in a relase + xp, _ = get_namespace(array, xp=xp) + return xp.log(xp.sum(xp.exp(array), axis=axis)) From a0f8d2598b09569bd1d2ae581fbdfed4298585cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 14 Mar 2025 22:42:56 +0100 Subject: [PATCH 21/92] Fix implementation of logsumexp --- sklearn/mixture/_base.py | 2 +- sklearn/utils/_array_api.py | 29 ++++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index 9a364cd8c01ea..6e032f560f256 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -532,8 +532,8 @@ def _estimate_log_prob_resp(self, X): """ xp, _ = get_namespace(X) weighted_log_prob = self._estimate_weighted_log_prob(X) - # TODO scipy.special.logsumexp needs scipy >= 1.15 for array API support log_prob_norm = _logsumexp(weighted_log_prob, axis=1) + with np.errstate(under="ignore"): # ignore underflow log_resp = weighted_log_prob - log_prob_norm[:, xp.newaxis] diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py index 976044525c669..9642b5591599a 100644 --- a/sklearn/utils/_array_api.py +++ b/sklearn/utils/_array_api.py @@ -1155,5 +1155,32 @@ def _tolist(array, xp=None): def _logsumexp(array, axis=None, xp=None): # TODO replace by scipy.special.logsumexp when # https://github.com/scipy/scipy/pull/22683 is in a relase + # The following code is strongly inspired and simplified from + # scipy.special._logsumexp.logsumexp xp, _ = get_namespace(array, xp=xp) - return xp.log(xp.sum(xp.exp(array), axis=axis)) + axis = tuple(range(array.ndim)) if axis is None else axis + + supported_dtypes = supported_float_dtypes(xp) + if array.dtype not in supported_dtypes: + array = xp.asarray(array, dtype=supported_dtypes[0]) + array_max = xp.max(array, axis=axis, keepdims=True) + index_max = array == array_max + + array = xp.asarray(array, copy=True) + array[index_max] = -xp.inf + i_max_dt = xp.astype(index_max, array.dtype) + m = xp.sum(i_max_dt, axis=axis, keepdims=True, dtype=array.dtype) + # device=a_max.device is needed to avoid https://github.com/scipy/scipy/issues/22680 + shift = xp.where( + xp.isfinite(array_max), + array_max, + xp.asarray(0, dtype=array_max.dtype, device=array_max.device), + ) + exp = xp.exp(array - shift) + s = xp.sum(exp, axis=axis, keepdims=True, dtype=exp.dtype) + s = xp.where(s == 0, s, s / m) + out = xp.log1p(s) + xp.log(m) + array_max + out = xp.squeeze(out, axis=axis) + out = out[()] if out.ndim == 0 else out + + return out From 53e9917600af92b3decf1fe4dbbb5ff117bc71ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 14 Mar 2025 23:29:22 +0100 Subject: [PATCH 22/92] Fix for older numpy versions --- sklearn/mixture/_gaussian_mixture.py | 2 +- sklearn/utils/_array_api.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 93705882d1486..73d2dcfeaeae3 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -340,7 +340,7 @@ def _compute_precision_cholesky(covariances, covariance_type): try: # TODO we are using xp.linalg instead of scipy.linalg.cholesky, # maybe separate branches for array API and numpy? - cov_chol = xp.linalg.cholesky(covariance, upper=False) + cov_chol = xp.linalg.cholesky(covariance) except xp.linalg.LinAlgError: raise ValueError(estimate_precision_error_message) diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py index 9642b5591599a..ccfee8598fe12 100644 --- a/sklearn/utils/_array_api.py +++ b/sklearn/utils/_array_api.py @@ -1157,7 +1157,7 @@ def _logsumexp(array, axis=None, xp=None): # https://github.com/scipy/scipy/pull/22683 is in a relase # The following code is strongly inspired and simplified from # scipy.special._logsumexp.logsumexp - xp, _ = get_namespace(array, xp=xp) + xp, _, device = get_namespace_and_device(array, xp=xp) axis = tuple(range(array.ndim)) if axis is None else axis supported_dtypes = supported_float_dtypes(xp) @@ -1170,11 +1170,11 @@ def _logsumexp(array, axis=None, xp=None): array[index_max] = -xp.inf i_max_dt = xp.astype(index_max, array.dtype) m = xp.sum(i_max_dt, axis=axis, keepdims=True, dtype=array.dtype) - # device=a_max.device is needed to avoid https://github.com/scipy/scipy/issues/22680 + # Specifying device explicitly is the fix for https://github.com/scipy/scipy/issues/22680 shift = xp.where( xp.isfinite(array_max), array_max, - xp.asarray(0, dtype=array_max.dtype, device=array_max.device), + xp.asarray(0, dtype=array_max.dtype, device=device), ) exp = xp.exp(array - shift) s = xp.sum(exp, axis=axis, keepdims=True, dtype=exp.dtype) From ac66a02af6de0f894544773024b50b045b722557 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Sat, 15 Mar 2025 08:58:36 +0100 Subject: [PATCH 23/92] [azure parallel] Add changelog template --- doc/whats_new/upcoming_changes/array-api/30777.feature.rst | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 doc/whats_new/upcoming_changes/array-api/30777.feature.rst diff --git a/doc/whats_new/upcoming_changes/array-api/30777.feature.rst b/doc/whats_new/upcoming_changes/array-api/30777.feature.rst new file mode 100644 index 0000000000000..6afe4e345e191 --- /dev/null +++ b/doc/whats_new/upcoming_changes/array-api/30777.feature.rst @@ -0,0 +1,5 @@ +- :class:`sklearn.gaussian_mixture.GaussianMixture` with + `initialization="random"` (TODO double-check which parameters actually are + supported, for example kmeans-based initialization will not work for now) now + support Array API compatible inputs. + By :user:`Stefanie Senger ` and :user:`Loïc Estève ` From dfa92d9d073096bacd1899f9ac4990d40c51f689 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Tue, 18 Mar 2025 17:02:57 +0100 Subject: [PATCH 24/92] Remove "# noqa" inline comment --- sklearn/utils/_array_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py index 4c9f8071b6158..652fc8c01db8d 100644 --- a/sklearn/utils/_array_api.py +++ b/sklearn/utils/_array_api.py @@ -85,7 +85,7 @@ def yield_namespace_device_dtype_combinations(include_numpy_namespaces=True): elif array_namespace == "array_api_strict": try: - import array_api_strict # noqa + import array_api_strict yield array_namespace, array_api_strict.Device("CPU_DEVICE"), "float64" yield array_namespace, array_api_strict.Device("device1"), "float32" From 5f440a9762d0e8f5b5042cbdbfa3b156cd28dcc1 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Wed, 19 Mar 2025 11:38:21 +0100 Subject: [PATCH 25/92] add test for _logsumexp --- sklearn/utils/tests/test_array_api.py | 28 +++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py index 40548406d84f2..9b199711fb4ab 100644 --- a/sklearn/utils/tests/test_array_api.py +++ b/sklearn/utils/tests/test_array_api.py @@ -3,6 +3,7 @@ import numpy import pytest +import scipy from numpy.testing import assert_allclose from sklearn._config import config_context @@ -17,6 +18,7 @@ _fill_or_add_to_diagonal, _is_numpy_namespace, _isin, + _logsumexp, _max_precision_float_dtype, _nanmax, _nanmean, @@ -635,3 +637,29 @@ def test_sparse_device(csr_container, dispatch): assert get_namespace_and_device(a, numpy.array([1]))[2] is None except ImportError: raise SkipTest("array_api_compat is not installed") + + +@pytest.mark.parametrize( + "array_namespace, device_, dtype_name", yield_namespace_device_dtype_combinations() +) +@pytest.mark.parametrize("axis", [0, 1, None]) +def test_logsumexp_like_scipy_logsumexp(array_namespace, device_, dtype_name, axis): + xp = _array_api_for_tests(array_namespace, device_) + array_np = numpy.asarray( + [ + [0, 3, 1000], + [2, -1, 1000], + [numpy.inf, 0, 0], + [numpy.nan, 8, -numpy.inf], + [4, 0, 5], + ], + dtype=dtype_name, + ) + array_xp = xp.asarray(array_np, device=device_) + + res_np = scipy.special.logsumexp(array_np, axis=axis) + + with config_context(array_api_dispatch=True): + res_xp = _logsumexp(array_xp, axis=axis) + res_xp = _convert_to_numpy(res_xp, xp) + assert_array_equal(res_np, res_xp) From dd59446743638c6f527445de63243003517b0714 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Wed, 19 Mar 2025 16:49:13 +0100 Subject: [PATCH 26/92] slightly improve tests --- .../array-api/30777.feature.rst | 8 ++--- sklearn/mixture/_base.py | 2 +- .../mixture/tests/test_gaussian_mixture.py | 33 ++++++++++++------- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/doc/whats_new/upcoming_changes/array-api/30777.feature.rst b/doc/whats_new/upcoming_changes/array-api/30777.feature.rst index 6afe4e345e191..096b851ea11e5 100644 --- a/doc/whats_new/upcoming_changes/array-api/30777.feature.rst +++ b/doc/whats_new/upcoming_changes/array-api/30777.feature.rst @@ -1,5 +1,3 @@ -- :class:`sklearn.gaussian_mixture.GaussianMixture` with - `initialization="random"` (TODO double-check which parameters actually are - supported, for example kmeans-based initialization will not work for now) now - support Array API compatible inputs. - By :user:`Stefanie Senger ` and :user:`Loïc Estève ` +- :class:`sklearn.gaussian_mixture.GaussianMixture` with `initialization="random"`, + `covariance_type="diag"` and `warm_start=False` now supports Array API compatible + inputs. By :user:`Stefanie Senger ` and :user:`Loïc Estève ` diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index 6e032f560f256..0995e06f4be99 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -119,7 +119,7 @@ def _initialize_parameters(self, X, random_state, xp): ) resp[xp.arange(n_samples), label] = 1 elif self.init_params == "random": - xp, _, device = get_namespace_and_device(X) + xp, _, device = get_namespace_and_device(X, xp=xp) resp = xp.asarray( random_state.uniform(size=(n_samples, self.n_components)), dtype=X.dtype, diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index ebd0b2ab818cc..d27d93adc25be 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1484,7 +1484,7 @@ def test_gaussian_mixture_all_init_does_not_estimate_gaussian_parameters( def test_gaussian_mixture_array_api_compliance( array_namespace, device, dtype, global_random_seed ): - X, y = make_blobs( + X, _ = make_blobs( n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed ) gmm = GaussianMixture( @@ -1493,19 +1493,28 @@ def test_gaussian_mixture_array_api_compliance( random_state=global_random_seed, init_params="random", ) + + gmm_dispatch = copy.deepcopy(gmm) + gmm.fit(X) - means_ref = gmm.means_ - covariances_ref = gmm.covariances_ xp = _array_api_for_tests(array_namespace, device) X = xp.asarray(X, device=device) - y = xp.asarray(y, device=device) - with sklearn.config_context(array_api_dispatch=True): - gmm.fit(X) - # TODO is there an easy way to test device? device can be None or 'cpu' in - # the numpy case ... - # assert gmm.means_.device == device - # assert gmm.covariances_.device == device - assert_allclose(means_ref, _convert_to_numpy(gmm.means_, xp=xp)) - assert_allclose(covariances_ref, _convert_to_numpy(gmm.covariances_, xp=xp)) + with sklearn.config_context(array_api_dispatch=True): + gmm_dispatch.fit(X) + + if array_namespace == "numpy": + assert gmm_dispatch.means_.device in ["cpu", None] + assert gmm_dispatch.covariances_.device in ["cpu", None] + elif array_namespace == "torch": + assert gmm_dispatch.means_.device.type == device + assert gmm_dispatch.covariances_.device.type == device + else: + assert gmm_dispatch.means_.device == device + assert gmm_dispatch.covariances_.device == device + + assert_allclose(gmm.means_, _convert_to_numpy(gmm_dispatch.means_, xp=xp)) + assert_allclose( + gmm.covariances_, _convert_to_numpy(gmm_dispatch.covariances_, xp=xp) + ) From 9e93dfa22ef36c11df3633010e1131b588c301c7 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Wed, 19 Mar 2025 17:01:46 +0100 Subject: [PATCH 27/92] improve device checking --- .../mixture/tests/test_gaussian_mixture.py | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index d27d93adc25be..eb938765a0425 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -32,6 +32,7 @@ ) from sklearn.utils._array_api import ( _convert_to_numpy, + device, yield_namespace_device_dtype_combinations, ) from sklearn.utils._testing import ( @@ -1479,10 +1480,10 @@ def test_gaussian_mixture_all_init_does_not_estimate_gaussian_parameters( @pytest.mark.parametrize( - "array_namespace, device, dtype", yield_namespace_device_dtype_combinations() + "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations() ) def test_gaussian_mixture_array_api_compliance( - array_namespace, device, dtype, global_random_seed + array_namespace, device_, dtype, global_random_seed ): X, _ = make_blobs( n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed @@ -1498,21 +1499,17 @@ def test_gaussian_mixture_array_api_compliance( gmm.fit(X) - xp = _array_api_for_tests(array_namespace, device) - X = xp.asarray(X, device=device) + xp = _array_api_for_tests(array_namespace, device_) + X = xp.asarray(X, device=device_) with sklearn.config_context(array_api_dispatch=True): gmm_dispatch.fit(X) - if array_namespace == "numpy": - assert gmm_dispatch.means_.device in ["cpu", None] - assert gmm_dispatch.covariances_.device in ["cpu", None] - elif array_namespace == "torch": - assert gmm_dispatch.means_.device.type == device - assert gmm_dispatch.covariances_.device.type == device - else: - assert gmm_dispatch.means_.device == device - assert gmm_dispatch.covariances_.device == device + assert ( + device(X) + == device(gmm_dispatch.means_) + == device(gmm_dispatch.covariances_) + ) assert_allclose(gmm.means_, _convert_to_numpy(gmm_dispatch.means_, xp=xp)) assert_allclose( From 76cf0fae5e6dbfef65edd73649af2176ab220967 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 21 Mar 2025 14:47:24 +0100 Subject: [PATCH 28/92] tweak --- .../mixture/tests/test_gaussian_mixture.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index eb938765a0425..24da5376331a3 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1495,23 +1495,18 @@ def test_gaussian_mixture_array_api_compliance( init_params="random", ) - gmm_dispatch = copy.deepcopy(gmm) - gmm.fit(X) + means_ = gmm.means_ + covariances_ = gmm.covariances_ xp = _array_api_for_tests(array_namespace, device_) X = xp.asarray(X, device=device_) with sklearn.config_context(array_api_dispatch=True): - gmm_dispatch.fit(X) + gmm.fit(X) - assert ( - device(X) - == device(gmm_dispatch.means_) - == device(gmm_dispatch.covariances_) - ) + assert device(X) == device(gmm.means_) + assert device(X) == device(gmm.covariances_) - assert_allclose(gmm.means_, _convert_to_numpy(gmm_dispatch.means_, xp=xp)) - assert_allclose( - gmm.covariances_, _convert_to_numpy(gmm_dispatch.covariances_, xp=xp) - ) + assert_allclose(means_, _convert_to_numpy(gmm.means_, xp=xp)) + assert_allclose(covariances_, _convert_to_numpy(gmm.covariances_, xp=xp)) From 489c3e3a4b73ea803cefd9f791b2d5743846780a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 21 Mar 2025 15:39:30 +0100 Subject: [PATCH 29/92] Pass xp along the call chain --- sklearn/mixture/_base.py | 32 ++++++------- sklearn/mixture/_bayesian_mixture.py | 6 +-- sklearn/mixture/_gaussian_mixture.py | 68 +++++++++++++++------------- 3 files changed, 55 insertions(+), 51 deletions(-) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index 0995e06f4be99..55ba77a6ce997 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -95,7 +95,7 @@ def _check_parameters(self, X): """ pass - def _initialize_parameters(self, X, random_state, xp): + def _initialize_parameters(self, X, random_state, xp=None): """Initialize the model parameters. Parameters @@ -249,8 +249,8 @@ def fit_predict(self, X, y=None): for n_iter in range(1, self.max_iter + 1): prev_lower_bound = lower_bound - log_prob_norm, log_resp = self._e_step(X) - self._m_step(X, log_resp) + log_prob_norm, log_resp = self._e_step(X, xp=xp) + self._m_step(X, log_resp, xp=xp) lower_bound = self._compute_lower_bound(log_resp, log_prob_norm) change = lower_bound - prev_lower_bound @@ -288,11 +288,11 @@ def fit_predict(self, X, y=None): # Always do a final e-step to guarantee that the labels returned by # fit_predict(X) are always consistent with fit(X).predict(X) # for any value of max_iter and tol (and any random_state). - _, log_resp = self._e_step(X) + _, log_resp = self._e_step(X, xp=xp) return xp.argmax(log_resp, axis=1) - def _e_step(self, X): + def _e_step(self, X, xp=None): """E step. Parameters @@ -308,8 +308,8 @@ def _e_step(self, X): Logarithm of the posterior probabilities (or responsibilities) of the point of each sample in X. """ - xp, _ = get_namespace(X) - log_prob_norm, log_resp = self._estimate_log_prob_resp(X) + xp, _ = get_namespace(X, xp=xp) + log_prob_norm, log_resp = self._estimate_log_prob_resp(X, xp=xp) return xp.mean(log_prob_norm), log_resp @abstractmethod @@ -407,7 +407,7 @@ def predict_proba(self, X): check_is_fitted(self) X = validate_data(self, X, reset=False) xp, _ = get_namespace(X) - _, log_resp = self._estimate_log_prob_resp(X) + _, log_resp = self._estimate_log_prob_resp(X, xp=xp) return xp.exp(log_resp) def sample(self, n_samples=1): @@ -472,7 +472,7 @@ def sample(self, n_samples=1): return (X, y) - def _estimate_weighted_log_prob(self, X): + def _estimate_weighted_log_prob(self, X, xp=None): """Estimate the weighted log-probabilities, log P(X | Z) + log weights. Parameters @@ -483,10 +483,10 @@ def _estimate_weighted_log_prob(self, X): ------- weighted_log_prob : array, shape (n_samples, n_component) """ - return self._estimate_log_prob(X) + self._estimate_log_weights() + return self._estimate_log_prob(X, xp=xp) + self._estimate_log_weights(xp=xp) @abstractmethod - def _estimate_log_weights(self): + def _estimate_log_weights(self, xp=None): """Estimate log-weights in EM algorithm, E[ log pi ] in VB algorithm. Returns @@ -496,7 +496,7 @@ def _estimate_log_weights(self): pass @abstractmethod - def _estimate_log_prob(self, X): + def _estimate_log_prob(self, X, xp=None): """Estimate the log-probabilities log P(X | Z). Compute the log-probabilities per each component for each sample. @@ -511,7 +511,7 @@ def _estimate_log_prob(self, X): """ pass - def _estimate_log_prob_resp(self, X): + def _estimate_log_prob_resp(self, X, xp=None): """Estimate log probabilities and responsibilities for each sample. Compute the log probabilities, weighted log probabilities per @@ -530,9 +530,9 @@ def _estimate_log_prob_resp(self, X): log_responsibilities : array, shape (n_samples, n_components) logarithm of the responsibilities """ - xp, _ = get_namespace(X) - weighted_log_prob = self._estimate_weighted_log_prob(X) - log_prob_norm = _logsumexp(weighted_log_prob, axis=1) + xp, _ = get_namespace(X, xp=xp) + weighted_log_prob = self._estimate_weighted_log_prob(X, xp=xp) + log_prob_norm = _logsumexp(weighted_log_prob, axis=1, xp=xp) with np.errstate(under="ignore"): # ignore underflow diff --git a/sklearn/mixture/_bayesian_mixture.py b/sklearn/mixture/_bayesian_mixture.py index 7de5cc844b098..babe54aeb7693 100644 --- a/sklearn/mixture/_bayesian_mixture.py +++ b/sklearn/mixture/_bayesian_mixture.py @@ -718,7 +718,7 @@ def _estimate_wishart_spherical(self, nk, xk, sk): # Contrary to the original bishop book, we normalize the covariances self.covariances_ /= self.degrees_of_freedom_ - def _m_step(self, X, log_resp): + def _m_step(self, X, log_resp, xp=None): """M step. Parameters @@ -738,7 +738,7 @@ def _m_step(self, X, log_resp): self._estimate_means(nk, xk) self._estimate_precisions(nk, xk, sk) - def _estimate_log_weights(self): + def _estimate_log_weights(self, xp=None): if self.weight_concentration_prior_type == "dirichlet_process": digamma_sum = digamma( self.weight_concentration_[0] + self.weight_concentration_[1] @@ -756,7 +756,7 @@ def _estimate_log_weights(self): np.sum(self.weight_concentration_) ) - def _estimate_log_prob(self, X): + def _estimate_log_prob(self, X, xp=None): _, n_features = X.shape # We remove `n_features * np.log(self.degrees_of_freedom_)` because # the precision matrix is normalized diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 4614c4072e3c1..0ba9db226864d 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -151,7 +151,7 @@ def _check_precisions(precisions, covariance_type, n_components, n_features): # Gaussian mixture parameters estimators (used by the M-Step) -def _estimate_gaussian_covariances_full(resp, X, nk, means, reg_covar): +def _estimate_gaussian_covariances_full(resp, X, nk, means, reg_covar, xp=None): """Estimate the full covariance matrices. Parameters @@ -171,7 +171,7 @@ def _estimate_gaussian_covariances_full(resp, X, nk, means, reg_covar): covariances : array, shape (n_components, n_features, n_features) The covariance matrix of the current components. """ - xp, _ = get_namespace(X) + xp, _ = get_namespace(X, xp=xp) n_components, n_features = means.shape covariances = xp.empty((n_components, n_features, n_features), dtype=X.dtype) for k in range(n_components): @@ -182,7 +182,7 @@ def _estimate_gaussian_covariances_full(resp, X, nk, means, reg_covar): return covariances -def _estimate_gaussian_covariances_tied(resp, X, nk, means, reg_covar): +def _estimate_gaussian_covariances_tied(resp, X, nk, means, reg_covar, xp=None): """Estimate the tied covariance matrix. Parameters @@ -202,6 +202,7 @@ def _estimate_gaussian_covariances_tied(resp, X, nk, means, reg_covar): covariance : array, shape (n_features, n_features) The tied covariance matrix of the components. """ + # TODO still using np here ... avg_X2 = np.dot(X.T, X) avg_means2 = np.dot(nk * means.T, means) covariance = avg_X2 - avg_means2 @@ -210,7 +211,7 @@ def _estimate_gaussian_covariances_tied(resp, X, nk, means, reg_covar): return covariance -def _estimate_gaussian_covariances_diag(resp, X, nk, means, reg_covar): +def _estimate_gaussian_covariances_diag(resp, X, nk, means, reg_covar, xp=None): """Estimate the diagonal covariance vectors. Parameters @@ -230,13 +231,13 @@ def _estimate_gaussian_covariances_diag(resp, X, nk, means, reg_covar): covariances : array, shape (n_components, n_features) The covariance vector of the current components. """ - xp, _ = get_namespace(X) + xp, _ = get_namespace(X, xp=xp) avg_X2 = (resp.T @ (X * X)) / nk[:, xp.newaxis] avg_means2 = means**2 return avg_X2 - avg_means2 + reg_covar -def _estimate_gaussian_covariances_spherical(resp, X, nk, means, reg_covar): +def _estimate_gaussian_covariances_spherical(resp, X, nk, means, reg_covar, xp=None): """Estimate the spherical variance values. Parameters @@ -256,10 +257,12 @@ def _estimate_gaussian_covariances_spherical(resp, X, nk, means, reg_covar): variances : array, shape (n_components,) The variance values of each components. """ - return _estimate_gaussian_covariances_diag(resp, X, nk, means, reg_covar).mean(1) + return _estimate_gaussian_covariances_diag( + resp, X, nk, means, reg_covar, xp=xp + ).mean(1) -def _estimate_gaussian_parameters(X, resp, reg_covar, covariance_type): +def _estimate_gaussian_parameters(X, resp, reg_covar, covariance_type, xp=None): """Estimate the Gaussian distribution parameters. Parameters @@ -288,7 +291,7 @@ def _estimate_gaussian_parameters(X, resp, reg_covar, covariance_type): The covariance matrix of the current components. The shape depends of the covariance_type. """ - xp, _ = get_namespace(X) + xp, _ = get_namespace(X, xp=xp) nk = xp.sum(resp, axis=0) + 10 * xp.finfo(resp.dtype).eps means = (resp.T @ X) / nk[:, xp.newaxis] covariances = { @@ -296,11 +299,11 @@ def _estimate_gaussian_parameters(X, resp, reg_covar, covariance_type): "tied": _estimate_gaussian_covariances_tied, "diag": _estimate_gaussian_covariances_diag, "spherical": _estimate_gaussian_covariances_spherical, - }[covariance_type](resp, X, nk, means, reg_covar) + }[covariance_type](resp, X, nk, means, reg_covar, xp=xp) return nk, means, covariances -def _compute_precision_cholesky(covariances, covariance_type): +def _compute_precision_cholesky(covariances, covariance_type, xp=None): """Compute the Cholesky decomposition of the precisions. Parameters @@ -318,7 +321,7 @@ def _compute_precision_cholesky(covariances, covariance_type): The cholesky decomposition of sample precisions of the current components. The shape depends of the covariance_type. """ - xp, _ = get_namespace(covariances) + xp, _ = get_namespace(covariances, xp=xp) estimate_precision_error_message = ( "Fitting the mixture model failed because some components have " @@ -370,7 +373,7 @@ def _flipudlr(array): return np.flipud(np.fliplr(array)) -def _compute_precision_cholesky_from_precisions(precisions, covariance_type): +def _compute_precision_cholesky_from_precisions(precisions, covariance_type, xp=None): r"""Compute the Cholesky decomposition of precisions using precisions themselves. As implemented in :func:`_compute_precision_cholesky`, the `precisions_cholesky_` is @@ -404,6 +407,7 @@ def _compute_precision_cholesky_from_precisions(precisions, covariance_type): The cholesky decomposition of sample precisions of the current components. The shape depends on the covariance_type. """ + # TODO still using np here ... if covariance_type == "full": precisions_cholesky = np.array( [ @@ -422,7 +426,7 @@ def _compute_precision_cholesky_from_precisions(precisions, covariance_type): ############################################################################### # Gaussian mixture probability estimators -def _compute_log_det_cholesky(matrix_chol, covariance_type, n_features): +def _compute_log_det_cholesky(matrix_chol, covariance_type, n_features, xp=None): """Compute the log-det of the cholesky decomposition of matrices. Parameters @@ -444,7 +448,7 @@ def _compute_log_det_cholesky(matrix_chol, covariance_type, n_features): log_det_precision_chol : array-like of shape (n_components,) The determinant of the precision matrix for each component. """ - xp, _ = get_namespace(matrix_chol) + xp, _ = get_namespace(matrix_chol, xp=xp) if covariance_type == "full": n_components, _, _ = matrix_chol.shape log_det_chol = xp.sum( @@ -463,7 +467,7 @@ def _compute_log_det_cholesky(matrix_chol, covariance_type, n_features): return log_det_chol -def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type): +def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type, xp=None): """Estimate the log Gaussian probability. Parameters @@ -485,7 +489,7 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type): ------- log_prob : array, shape (n_samples, n_components) """ - xp, _, device = get_namespace_and_device(X, means, precisions_chol) + xp, _, device = get_namespace_and_device(X, means, precisions_chol, xp=xp) n_samples, n_features = X.shape n_components, _ = means.shape # The determinant of the precision matrix from the Cholesky decomposition @@ -786,7 +790,7 @@ def _check_parameters(self, X): n_features, ) - def _initialize_parameters(self, X, random_state, xp): + def _initialize_parameters(self, X, random_state, xp=None): # If all the initial parameters are all provided, then there is no need to run # the initialization. compute_resp = ( @@ -795,11 +799,11 @@ def _initialize_parameters(self, X, random_state, xp): or self.precisions_init is None ) if compute_resp: - super()._initialize_parameters(X, random_state, xp) + super()._initialize_parameters(X, random_state, xp=xp) else: - self._initialize(X, None) + self._initialize(X, None, xp=xp) - def _initialize(self, X, resp): + def _initialize(self, X, resp, xp=None): """Initialization of the Gaussian mixture parameters. Parameters @@ -812,7 +816,7 @@ def _initialize(self, X, resp): weights, means, covariances = None, None, None if resp is not None: weights, means, covariances = _estimate_gaussian_parameters( - X, resp, self.reg_covar, self.covariance_type + X, resp, self.reg_covar, self.covariance_type, xp=xp ) if self.weights_init is None: weights /= n_samples @@ -823,14 +827,14 @@ def _initialize(self, X, resp): if self.precisions_init is None: self.covariances_ = covariances self.precisions_cholesky_ = _compute_precision_cholesky( - covariances, self.covariance_type + covariances, self.covariance_type, xp=xp ) else: self.precisions_cholesky_ = _compute_precision_cholesky_from_precisions( - self.precisions_init, self.covariance_type + self.precisions_init, self.covariance_type, xp=xp ) - def _m_step(self, X, log_resp): + def _m_step(self, X, log_resp, xp=None): """M step. Parameters @@ -841,22 +845,22 @@ def _m_step(self, X, log_resp): Logarithm of the posterior probabilities (or responsibilities) of the point of each sample in X. """ - xp, _ = get_namespace(X, log_resp) + xp, _ = get_namespace(X, log_resp, xp=xp) self.weights_, self.means_, self.covariances_ = _estimate_gaussian_parameters( - X, xp.exp(log_resp), self.reg_covar, self.covariance_type + X, xp.exp(log_resp), self.reg_covar, self.covariance_type, xp=xp ) self.weights_ /= xp.sum(self.weights_) self.precisions_cholesky_ = _compute_precision_cholesky( - self.covariances_, self.covariance_type + self.covariances_, self.covariance_type, xp=xp ) - def _estimate_log_prob(self, X): + def _estimate_log_prob(self, X, xp=None): return _estimate_log_gaussian_prob( - X, self.means_, self.precisions_cholesky_, self.covariance_type + X, self.means_, self.precisions_cholesky_, self.covariance_type, xp=xp ) - def _estimate_log_weights(self): - xp, _ = get_namespace(self.weights_) + def _estimate_log_weights(self, xp=None): + xp, _ = get_namespace(self.weights_, xp=xp) return xp.log(self.weights_) def _compute_lower_bound(self, _, log_prob_norm): From 6dccb4702ce433d2c02bde969943c5b7c5b702ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 21 Mar 2025 16:30:04 +0100 Subject: [PATCH 30/92] tweak --- sklearn/mixture/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index 55ba77a6ce997..aca4c1f082929 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -237,7 +237,7 @@ def fit_predict(self, X, y=None): self._print_verbose_msg_init_beg(init) if do_init: - self._initialize_parameters(X, random_state, xp) + self._initialize_parameters(X, random_state, xp=xp) lower_bound = -xp.inf if do_init else self.lower_bound_ From 30894cd579aa7c2175dc5d1f5aefa75521a1e032 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Wed, 26 Mar 2025 11:20:41 +0100 Subject: [PATCH 31/92] add NotImplementedError and test --- sklearn/mixture/_gaussian_mixture.py | 14 +++++++++++ .../mixture/tests/test_gaussian_mixture.py | 25 +++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 0ba9db226864d..0418504156db9 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -6,6 +6,7 @@ import numpy as np from scipy import linalg +from .._config import get_config from ..utils import check_array from ..utils._array_api import get_namespace, get_namespace_and_device from ..utils._param_validation import StrOptions @@ -790,6 +791,19 @@ def _check_parameters(self, X): n_features, ) + allowed_init_values = ["random", "random_from_data"] + if ( + get_config()["array_api_dispatch"] + and self.init_params not in allowed_init_values + ): + raise NotImplementedError( + f"Allowed `init_params` are {allowed_init_values} if " + f"'array_api_dispatch' is enabled. You passed " + f"init_params={self.init_params!r}, which are not implemented to work " + "with 'array_api_dispatch' enabled. Please disable " + f"'array_api_dispatch' to use init_params={self.init_params!r}." + ) + def _initialize_parameters(self, X, random_state, xp=None): # If all the initial parameters are all provided, then there is no need to run # the initialization. diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index 24da5376331a3..283d51daeeb8e 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1510,3 +1510,28 @@ def test_gaussian_mixture_array_api_compliance( assert_allclose(means_, _convert_to_numpy(gmm.means_, xp=xp)) assert_allclose(covariances_, _convert_to_numpy(gmm.covariances_, xp=xp)) + + +# TODO: remove when gmm works with `init_params` are `kmeans` or `k-means++` +@pytest.mark.parametrize("init_params", ["kmeans", "k-means++"]) +@pytest.mark.parametrize( + "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations() +) +def test_gaussian_mixture_raises_where_array_api_not_implemented( + init_params, array_namespace, device_, dtype +): + X, _ = make_blobs( + n_samples=int(1e3), + n_features=2, + centers=3, + ) + gmm = GaussianMixture( + n_components=3, covariance_type="diag", init_params=init_params + ) + + with sklearn.config_context(array_api_dispatch=True): + with pytest.raises( + NotImplementedError, + match="Allowed `init_params`.+if 'array_api_dispatch' is enabled", + ): + gmm.fit(X) From ae06fe173578442a033ef2d4074156ac324911be Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Wed, 26 Mar 2025 11:55:17 +0100 Subject: [PATCH 32/92] add array api support for init_params='random_from_data' --- .../upcoming_changes/array-api/30777.feature.rst | 7 ++++--- sklearn/mixture/_base.py | 11 ++++++++--- sklearn/mixture/tests/test_gaussian_mixture.py | 5 +++-- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/doc/whats_new/upcoming_changes/array-api/30777.feature.rst b/doc/whats_new/upcoming_changes/array-api/30777.feature.rst index 096b851ea11e5..ed985fcc77d29 100644 --- a/doc/whats_new/upcoming_changes/array-api/30777.feature.rst +++ b/doc/whats_new/upcoming_changes/array-api/30777.feature.rst @@ -1,3 +1,4 @@ -- :class:`sklearn.gaussian_mixture.GaussianMixture` with `initialization="random"`, - `covariance_type="diag"` and `warm_start=False` now supports Array API compatible - inputs. By :user:`Stefanie Senger ` and :user:`Loïc Estève ` +- :class:`sklearn.gaussian_mixture.GaussianMixture` with + `initialization="random/random_from_data"` and `covariance_type="diag"` and + `warm_start=False` now supports Array API compatible inputs. + By :user:`Stefanie Senger ` and :user:`Loïc Estève ` diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index aca4c1f082929..8773b8d818ac3 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -106,6 +106,7 @@ def _initialize_parameters(self, X, random_state, xp=None): A random number generator instance that controls the random seed used for the method chosen to initialize the parameters. """ + xp, _, device = get_namespace_and_device(X, xp=xp) n_samples, _ = X.shape if self.init_params == "kmeans": @@ -119,7 +120,6 @@ def _initialize_parameters(self, X, random_state, xp=None): ) resp[xp.arange(n_samples), label] = 1 elif self.init_params == "random": - xp, _, device = get_namespace_and_device(X, xp=xp) resp = xp.asarray( random_state.uniform(size=(n_samples, self.n_components)), dtype=X.dtype, @@ -127,11 +127,16 @@ def _initialize_parameters(self, X, random_state, xp=None): ) resp /= xp.sum(resp, axis=1)[:, xp.newaxis] elif self.init_params == "random_from_data": - resp = xp.zeros((n_samples, self.n_components), dtype=X.dtype) + resp = xp.zeros( + (n_samples, self.n_components), dtype=X.dtype, device=device + ) indices = random_state.choice( n_samples, size=self.n_components, replace=False ) - resp[indices, xp.arange(self.n_components)] = 1 + # TODO: instead of for-loop, find something more efficient; previous code: + # resp[indices, xp.arange(self.n_components)] = 1 + for count, index in enumerate(indices): + resp[index, count] = 1 elif self.init_params == "k-means++": resp = xp.zeros((n_samples, self.n_components), dtype=X.dtype) _, indices = kmeans_plusplus( diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index 283d51daeeb8e..4da93cce21f37 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1479,11 +1479,12 @@ def test_gaussian_mixture_all_init_does_not_estimate_gaussian_parameters( assert mock.call_count == gm.n_iter_ +@pytest.mark.parametrize("init_params", ["random", "random_from_data"]) @pytest.mark.parametrize( "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations() ) def test_gaussian_mixture_array_api_compliance( - array_namespace, device_, dtype, global_random_seed + init_params, array_namespace, device_, dtype, global_random_seed ): X, _ = make_blobs( n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed @@ -1492,7 +1493,7 @@ def test_gaussian_mixture_array_api_compliance( n_components=3, covariance_type="diag", random_state=global_random_seed, - init_params="random", + init_params=init_params, ) gmm.fit(X) From 3f2d92832947f40ec9931a22c176ba15d569dd0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Wed, 26 Mar 2025 12:14:01 +0100 Subject: [PATCH 33/92] Fix? --- sklearn/mixture/tests/test_gaussian_mixture.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index 4da93cce21f37..2316840d8f213 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1521,6 +1521,8 @@ def test_gaussian_mixture_array_api_compliance( def test_gaussian_mixture_raises_where_array_api_not_implemented( init_params, array_namespace, device_, dtype ): + # TODO skips tests if dependencies are not installed + _array_api_for_tests(array_namespace, device=None) X, _ = make_blobs( n_samples=int(1e3), n_features=2, From 6be6aa234f970bef8b988ea31437fba701eb1910 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Thu, 27 Mar 2025 10:46:05 +0100 Subject: [PATCH 34/92] Add a sumlogexp test without nans or +inf --- sklearn/utils/tests/test_array_api.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py index 84850ed0832eb..14366941f6928 100644 --- a/sklearn/utils/tests/test_array_api.py +++ b/sklearn/utils/tests/test_array_api.py @@ -596,8 +596,8 @@ def test_logsumexp_like_scipy_logsumexp(array_namespace, device_, dtype_name, ax [ [0, 3, 1000], [2, -1, 1000], - [numpy.inf, 0, 0], - [numpy.nan, 8, -numpy.inf], + [-10, 0, 0], + [-50, 8, -numpy.inf], [4, 0, 5], ], dtype=dtype_name, @@ -610,3 +610,23 @@ def test_logsumexp_like_scipy_logsumexp(array_namespace, device_, dtype_name, ax res_xp = _logsumexp(array_xp, axis=axis) res_xp = _convert_to_numpy(res_xp, xp) assert_array_equal(res_np, res_xp) + + # Test with NaNs and np.inf + array_np_2 = numpy.asarray( + [ + [0, numpy.nan, 1000], + [2, -1, 1000], + [numpy.inf, 0, 0], + [-50, 8, -numpy.inf], + [4, 0, 5], + ], + dtype=dtype_name, + ) + array_xp_2 = xp.asarray(array_np_2, device=device_) + + res_np_2 = scipy.special.logsumexp(array_np_2, axis=axis) + + with config_context(array_api_dispatch=True): + res_xp_2 = _logsumexp(array_xp_2, axis=axis) + res_xp_2 = _convert_to_numpy(res_xp_2, xp) + assert_array_equal(res_np_2, res_xp_2) From 805742b84a475121ee1c1eeaa947f04ce2e33ea4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Thu, 27 Mar 2025 10:46:35 +0100 Subject: [PATCH 35/92] tweak --- sklearn/utils/tests/test_array_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py index 14366941f6928..f732f54d411ec 100644 --- a/sklearn/utils/tests/test_array_api.py +++ b/sklearn/utils/tests/test_array_api.py @@ -611,7 +611,7 @@ def test_logsumexp_like_scipy_logsumexp(array_namespace, device_, dtype_name, ax res_xp = _convert_to_numpy(res_xp, xp) assert_array_equal(res_np, res_xp) - # Test with NaNs and np.inf + # Test with NaNs and +np.inf array_np_2 = numpy.asarray( [ [0, numpy.nan, 1000], From 90bf491f5f9cc91129bf9c3ef97214eab8bd0397 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Thu, 27 Mar 2025 11:31:49 +0100 Subject: [PATCH 36/92] Add test for logsumexp on default device with array API dispatch disabled --- sklearn/utils/_array_api.py | 1 + sklearn/utils/tests/test_array_api.py | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py index 8f0c492677c29..f3db2a7f6522a 100644 --- a/sklearn/utils/_array_api.py +++ b/sklearn/utils/_array_api.py @@ -998,6 +998,7 @@ def _logsumexp(array, axis=None, xp=None): supported_dtypes = supported_float_dtypes(xp) if array.dtype not in supported_dtypes: array = xp.asarray(array, dtype=supported_dtypes[0]) + array_max = xp.max(array, axis=axis, keepdims=True) index_max = array == array_max diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py index f732f54d411ec..66b47c18b55d5 100644 --- a/sklearn/utils/tests/test_array_api.py +++ b/sklearn/utils/tests/test_array_api.py @@ -606,10 +606,20 @@ def test_logsumexp_like_scipy_logsumexp(array_namespace, device_, dtype_name, ax res_np = scipy.special.logsumexp(array_np, axis=axis) + rtol = 1e-6 if "float32" in str(dtype_name) else 1e-12 + + # if torch on CPU or array api strict on default device + # check that _logsumexp works when array API dispatch is disabled + # TODO is there a better way for this + if (array_namespace == "torch" and device_ == "cpu") or ( + array_namespace == "array_api_strict" and "CPU" in str(device_) + ): + assert_allclose(_logsumexp(array_xp, axis=axis), res_np, rtol=rtol) + with config_context(array_api_dispatch=True): res_xp = _logsumexp(array_xp, axis=axis) res_xp = _convert_to_numpy(res_xp, xp) - assert_array_equal(res_np, res_xp) + assert_allclose(res_np, res_xp, rtol=rtol) # Test with NaNs and +np.inf array_np_2 = numpy.asarray( @@ -629,4 +639,4 @@ def test_logsumexp_like_scipy_logsumexp(array_namespace, device_, dtype_name, ax with config_context(array_api_dispatch=True): res_xp_2 = _logsumexp(array_xp_2, axis=axis) res_xp_2 = _convert_to_numpy(res_xp_2, xp) - assert_array_equal(res_np_2, res_xp_2) + assert_allclose(res_np_2, res_xp_2, rtol=rtol) From b07b1713dd3a7b86d677702447f5f12d3bc049f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Thu, 27 Mar 2025 11:45:33 +0100 Subject: [PATCH 37/92] Cleaner way to skip when array API dispatch is disabled --- sklearn/mixture/tests/test_gaussian_mixture.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index 2316840d8f213..3ea2e8d07c685 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -41,6 +41,7 @@ assert_almost_equal, assert_array_almost_equal, assert_array_equal, + skip_if_array_api_compat_not_configured, ) from sklearn.utils.extmath import fast_logdet @@ -1514,6 +1515,7 @@ def test_gaussian_mixture_array_api_compliance( # TODO: remove when gmm works with `init_params` are `kmeans` or `k-means++` +@skip_if_array_api_compat_not_configured @pytest.mark.parametrize("init_params", ["kmeans", "k-means++"]) @pytest.mark.parametrize( "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations() @@ -1521,8 +1523,6 @@ def test_gaussian_mixture_array_api_compliance( def test_gaussian_mixture_raises_where_array_api_not_implemented( init_params, array_namespace, device_, dtype ): - # TODO skips tests if dependencies are not installed - _array_api_for_tests(array_namespace, device=None) X, _ = make_blobs( n_samples=int(1e3), n_features=2, From baf6982329937c0649cd30a0fc83a719b0f32b65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Thu, 27 Mar 2025 11:45:58 +0100 Subject: [PATCH 38/92] [azure parallel] From 339c16bd9cf0d8213e25a064d6dc33440fbc8d80 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Wed, 2 Apr 2025 12:08:38 +0200 Subject: [PATCH 39/92] add support for weights_init --- sklearn/mixture/_base.py | 4 +- sklearn/mixture/_gaussian_mixture.py | 39 ++++++---- .../mixture/tests/test_gaussian_mixture.py | 71 ++++++++++++++++++- 3 files changed, 96 insertions(+), 18 deletions(-) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index 5373500cb65c5..59e9e4240637b 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -31,7 +31,7 @@ def _check_shape(param, param_shape, name): name : str """ - param = np.array(param) + # param = xp.array(param) if param.shape != param_shape: raise ValueError( "The parameter '%s' should have the shape of %s, but got %s" @@ -226,7 +226,7 @@ def fit_predict(self, X, y=None): f"but got n_components = {self.n_components}, " f"n_samples = {X.shape[0]}" ) - self._check_parameters(X) + self._check_parameters(X, xp=xp) # if we enable warm_start, we will have a unique initialisation do_init = not (self.warm_start and hasattr(self, "converged_")) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 0157c766ecc19..4d19718955d00 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -7,6 +7,7 @@ from scipy import linalg from .._config import get_config +from ..externals import array_api_extra as xpx from ..utils import check_array from ..utils._array_api import get_namespace, get_namespace_and_device from ..utils._param_validation import StrOptions @@ -17,7 +18,7 @@ # Gaussian mixture shape checkers used by the GaussianMixture class -def _check_weights(weights, n_components): +def _check_weights(weights, n_components, xp=None): """Check the user provided 'weights'. Parameters @@ -32,23 +33,23 @@ def _check_weights(weights, n_components): ------- weights : array, shape (n_components,) """ - weights = check_array(weights, dtype=[np.float64, np.float32], ensure_2d=False) + weights = check_array(weights, dtype=[xp.float64, xp.float32], ensure_2d=False) _check_shape(weights, (n_components,), "weights") # check range - if any(np.less(weights, 0.0)) or any(np.greater(weights, 1.0)): + if any(xp.less(weights, 0.0)) or any(xp.greater(weights, 1.0)): raise ValueError( "The parameter 'weights' should be in the range " "[0, 1], but got max value %.5f, min value %.5f" - % (np.min(weights), np.max(weights)) + % (xp.min(weights), xp.max(weights)) ) # check normalization - atol = 1e-6 if weights.dtype == np.float32 else 1e-8 - if not np.allclose(np.abs(1.0 - np.sum(weights)), 0.0, atol=atol): + atol = 1e-6 if weights.dtype == xp.float32 else 1e-8 + if not xpx.isclose(xp.abs(1.0 - xp.sum(weights)), 0.0, atol=atol, xp=xp): raise ValueError( "The parameter 'weights' should be normalized, but got sum(weights) = %.5f" - % np.sum(weights) + % xp.sum(weights) ) return weights @@ -342,14 +343,15 @@ def _compute_precision_cholesky(covariances, covariance_type, xp=None): precisions_chol = xp.empty((n_components, n_features, n_features), dtype=dtype) for k, covariance in enumerate(covariances): try: - # TODO we are using xp.linalg instead of scipy.linalg.cholesky, - # maybe separate branches for array API and numpy? + # TODO we are using xp.linalg instead of scipy.linalg.cholesky, maybe + # separate branches for array API and numpy? cov_chol = xp.linalg.cholesky(covariance) except xp.linalg.LinAlgError: raise ValueError(estimate_precision_error_message) # TODO we are using xp.linalg.solve instead of scipy.linalg.solve_triangular - # probably separate branches for array API and numpy? + # probably separate branches for array API and numpy? maybe + # https://github.com/scikit-learn/scikit-learn/pull/29318 is relevant precisions_chol[k] = xp.linalg.solve( cov_chol, xp.eye(n_features, dtype=dtype) ).T @@ -775,12 +777,14 @@ def __init__( self.means_init = means_init self.precisions_init = precisions_init - def _check_parameters(self, X): + def _check_parameters(self, X, xp=None): """Check the Gaussian mixture parameters are well defined.""" _, n_features = X.shape if self.weights_init is not None: - self.weights_init = _check_weights(self.weights_init, self.n_components) + self.weights_init = _check_weights( + self.weights_init, self.n_components, xp=xp + ) if self.means_init is not None: self.means_init = _check_means( @@ -795,13 +799,13 @@ def _check_parameters(self, X): n_features, ) - allowed_init_values = ["random", "random_from_data"] + allowed_init_params = ["random", "random_from_data"] if ( get_config()["array_api_dispatch"] - and self.init_params not in allowed_init_values + and self.init_params not in allowed_init_params ): raise NotImplementedError( - f"Allowed `init_params` are {allowed_init_values} if " + f"Allowed `init_params` are {allowed_init_params} if " f"'array_api_dispatch' is enabled. You passed " f"init_params={self.init_params!r}, which are not implemented to work " "with 'array_api_dispatch' enabled. Please disable " @@ -830,6 +834,9 @@ def _initialize(self, X, resp, xp=None): resp : array-like of shape (n_samples, n_components) """ + # TODO: check if device_ should be computed in fit_predict and passed down the + # call chain + xp, _, device_ = get_namespace_and_device(X, xp=xp) n_samples, _ = X.shape weights, means, covariances = None, None, None if resp is not None: @@ -840,6 +847,8 @@ def _initialize(self, X, resp, xp=None): weights /= n_samples self.weights_ = weights if self.weights_init is None else self.weights_init + self.weights_ = xp.asarray(self.weights_, device=device_) + self.means_ = means if self.means_init is None else self.means_init if self.precisions_init is None: diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index 361344e6d363f..6d8e886c7af4f 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -9,6 +9,7 @@ from io import StringIO from unittest.mock import Mock +import array_api_strict import numpy as np import pytest from scipy import linalg, stats @@ -1515,7 +1516,32 @@ def test_gaussian_mixture_array_api_compliance( assert_allclose(covariances_, _convert_to_numpy(gmm.covariances_, xp=xp)) -# TODO: remove when gmm works with `init_params` are `kmeans` or `k-means++` +@pytest.mark.parametrize( + "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations() +) +def test_gaussian_mixture_array_api_with_weights_init( + array_namespace, device_, dtype, global_random_seed +): + X, _ = make_blobs( + n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed + ) + + xp = _array_api_for_tests(array_namespace, device_) + X = xp.asarray(X, device=device_) + + gmm = GaussianMixture( + n_components=3, + covariance_type="diag", + random_state=global_random_seed, + init_params="random", + weights_init=xp.asarray([0.1, 0.4, 0.5]), + ) + + with sklearn.config_context(array_api_dispatch=True): + gmm.fit(X) + + +# TODO: remove when gmm works with `init_params` `kmeans` or `k-means++` @skip_if_array_api_compat_not_configured @pytest.mark.parametrize("init_params", ["kmeans", "k-means++"]) @pytest.mark.parametrize( @@ -1539,3 +1565,46 @@ def test_gaussian_mixture_raises_where_array_api_not_implemented( match="Allowed `init_params`.+if 'array_api_dispatch' is enabled", ): gmm.fit(X) + + +@pytest.mark.parametrize( + "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations() +) +def test_gaussian_mixture_array_api_different_namespaces( + array_namespace, device_, dtype, global_random_seed +): + """Test that array api works if `X` and `weights_init` come from different array + namespaces.""" + X, _ = make_blobs( + n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed + ) + + # check with weights_init being a numpy array + gmm = GaussianMixture( + n_components=3, + covariance_type="diag", + random_state=global_random_seed, + init_params="random", + weights_init=np.asarray([0.1, 0.4, 0.5]), + ) + + xp = _array_api_for_tests(array_namespace, device_) + X = xp.asarray(X, device=device_) + + with sklearn.config_context(array_api_dispatch=True): + gmm.fit(X) + + # check with weights_init being an array_api_strict array + gmm = GaussianMixture( + n_components=3, + covariance_type="diag", + random_state=global_random_seed, + init_params="random", + weights_init=array_api_strict.asarray([0.1, 0.4, 0.5]), + ) + + xp = _array_api_for_tests(array_namespace, device_) + X = xp.asarray(X, device=device_) + + with sklearn.config_context(array_api_dispatch=True): + gmm.fit(X) From cbc8811f624a3d46ec88d7533329449627a4f1b8 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Wed, 2 Apr 2025 15:00:55 +0200 Subject: [PATCH 40/92] fix signature and add assert to test --- sklearn/mixture/_base.py | 2 +- sklearn/mixture/tests/test_gaussian_mixture.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index 59e9e4240637b..ce71136b6dcb1 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -86,7 +86,7 @@ def __init__( self.verbose_interval = verbose_interval @abstractmethod - def _check_parameters(self, X): + def _check_parameters(self, X, xp=None): """Check initial parameters of the derived class. Parameters diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index 6d8e886c7af4f..5309fe595fe2a 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1522,6 +1522,8 @@ def test_gaussian_mixture_array_api_compliance( def test_gaussian_mixture_array_api_with_weights_init( array_namespace, device_, dtype, global_random_seed ): + """Check that passing `weights_init` during instantiation correctly converts to the + same namespace as X.""" X, _ = make_blobs( n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed ) @@ -1540,6 +1542,8 @@ def test_gaussian_mixture_array_api_with_weights_init( with sklearn.config_context(array_api_dispatch=True): gmm.fit(X) + assert device(X) == device(gmm.weights_) + # TODO: remove when gmm works with `init_params` `kmeans` or `k-means++` @skip_if_array_api_compat_not_configured From 614f7b51936c22475c6c641c24e35a7c7eecfd76 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Thu, 3 Apr 2025 09:56:30 +0200 Subject: [PATCH 41/92] some small things --- .../array-api/30777.feature.rst | 2 +- .../mixture/tests/test_gaussian_mixture.py | 85 +++++++++---------- 2 files changed, 41 insertions(+), 46 deletions(-) diff --git a/doc/whats_new/upcoming_changes/array-api/30777.feature.rst b/doc/whats_new/upcoming_changes/array-api/30777.feature.rst index ed985fcc77d29..84a1b16855c84 100644 --- a/doc/whats_new/upcoming_changes/array-api/30777.feature.rst +++ b/doc/whats_new/upcoming_changes/array-api/30777.feature.rst @@ -1,4 +1,4 @@ - :class:`sklearn.gaussian_mixture.GaussianMixture` with - `initialization="random/random_from_data"` and `covariance_type="diag"` and + `init_params` "random" or "random_from_data" and `covariance_type="diag"` and `warm_start=False` now supports Array API compatible inputs. By :user:`Stefanie Senger ` and :user:`Loïc Estève ` diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index 5309fe595fe2a..ee8451c94dc1c 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1522,8 +1522,8 @@ def test_gaussian_mixture_array_api_compliance( def test_gaussian_mixture_array_api_with_weights_init( array_namespace, device_, dtype, global_random_seed ): - """Check that passing `weights_init` during instantiation correctly converts to the - same namespace as X.""" + """Check that array api works with `weights_init`, which unlike other passed arrays + is an init param.""" X, _ = make_blobs( n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed ) @@ -1545,6 +1545,44 @@ def test_gaussian_mixture_array_api_with_weights_init( assert device(X) == device(gmm.weights_) +@pytest.mark.parametrize( + "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations() +) +def test_gaussian_mixture_array_api_different_namespaces( + array_namespace, device_, dtype, global_random_seed +): + """Check that passing `weights_init` in a different namespace during instantiation + correctly converts to the same namespace as X.""" + X, _ = make_blobs( + n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed + ) + + xp = _array_api_for_tests(array_namespace, device_) + X = xp.asarray(X, device=device_) + + """# check with weights_init being a numpy array + with sklearn.config_context(array_api_dispatch=True): + gmm = GaussianMixture( + n_components=3, + covariance_type="diag", + random_state=global_random_seed, + init_params="random", + weights_init=np.asarray([0.1, 0.4, 0.5]), + ) + gmm.fit(X)""" + + # check with weights_init being an array_api_strict array + with sklearn.config_context(array_api_dispatch=True): + gmm = GaussianMixture( + n_components=3, + covariance_type="diag", + random_state=global_random_seed, + init_params="random", + weights_init=array_api_strict.asarray([0.1, 0.4, 0.5]), + ) + gmm.fit(X) + + # TODO: remove when gmm works with `init_params` `kmeans` or `k-means++` @skip_if_array_api_compat_not_configured @pytest.mark.parametrize("init_params", ["kmeans", "k-means++"]) @@ -1569,46 +1607,3 @@ def test_gaussian_mixture_raises_where_array_api_not_implemented( match="Allowed `init_params`.+if 'array_api_dispatch' is enabled", ): gmm.fit(X) - - -@pytest.mark.parametrize( - "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations() -) -def test_gaussian_mixture_array_api_different_namespaces( - array_namespace, device_, dtype, global_random_seed -): - """Test that array api works if `X` and `weights_init` come from different array - namespaces.""" - X, _ = make_blobs( - n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed - ) - - # check with weights_init being a numpy array - gmm = GaussianMixture( - n_components=3, - covariance_type="diag", - random_state=global_random_seed, - init_params="random", - weights_init=np.asarray([0.1, 0.4, 0.5]), - ) - - xp = _array_api_for_tests(array_namespace, device_) - X = xp.asarray(X, device=device_) - - with sklearn.config_context(array_api_dispatch=True): - gmm.fit(X) - - # check with weights_init being an array_api_strict array - gmm = GaussianMixture( - n_components=3, - covariance_type="diag", - random_state=global_random_seed, - init_params="random", - weights_init=array_api_strict.asarray([0.1, 0.4, 0.5]), - ) - - xp = _array_api_for_tests(array_namespace, device_) - X = xp.asarray(X, device=device_) - - with sklearn.config_context(array_api_dispatch=True): - gmm.fit(X) From 90baf84c5f0886cc4a102bdf0d4b123b452c9c95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Thu, 3 Apr 2025 10:13:55 +0200 Subject: [PATCH 42/92] Fix BayesianGaussianMixture --- sklearn/mixture/_bayesian_mixture.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/mixture/_bayesian_mixture.py b/sklearn/mixture/_bayesian_mixture.py index 83e889984241b..6858e45e1972b 100644 --- a/sklearn/mixture/_bayesian_mixture.py +++ b/sklearn/mixture/_bayesian_mixture.py @@ -410,7 +410,7 @@ def __init__( self.degrees_of_freedom_prior = degrees_of_freedom_prior self.covariance_prior = covariance_prior - def _check_parameters(self, X): + def _check_parameters(self, X, xp=None): """Check that the parameters are well defined. Parameters From 1e7a3856f68adef7cb2b6b3d531b4b3a705650bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Thu, 3 Apr 2025 10:18:57 +0200 Subject: [PATCH 43/92] Add comment --- sklearn/mixture/_bayesian_mixture.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sklearn/mixture/_bayesian_mixture.py b/sklearn/mixture/_bayesian_mixture.py index 6858e45e1972b..2a62f159b1df6 100644 --- a/sklearn/mixture/_bayesian_mixture.py +++ b/sklearn/mixture/_bayesian_mixture.py @@ -417,6 +417,9 @@ def _check_parameters(self, X, xp=None): ---------- X : array-like of shape (n_samples, n_features) """ + # TODO should we pass xp to the check functions in other words + # should we test BayesianGaussianMixture array API support? + # Maybe we should leave it for a further PR self._check_weights_parameters() self._check_means_parameters(X) self._check_precision_parameters(X) From e4618cff62b6586077e44e6b6ab3b9c149c4a52f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Thu, 3 Apr 2025 12:05:14 +0200 Subject: [PATCH 44/92] Remove all remaining code using np and make most tests pass --- sklearn/mixture/_base.py | 20 ++-- sklearn/mixture/_gaussian_mixture.py | 96 ++++++++------- .../mixture/tests/test_gaussian_mixture.py | 113 ++++++++++++------ 3 files changed, 143 insertions(+), 86 deletions(-) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index ce71136b6dcb1..05dca67346ae4 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -290,7 +290,7 @@ def fit_predict(self, X, y=None): ConvergenceWarning, ) - self._set_parameters(best_params) + self._set_parameters(best_params, xp=xp) self.n_iter_ = best_n_iter self.lower_bound_ = max_lower_bound self.lower_bounds_ = best_lower_bounds @@ -437,6 +437,9 @@ def sample(self, n_samples=1): Component labels. """ check_is_fitted(self) + # TODO what is a cleaner way to do this, should we have a self.xp_? + # TODO we probably want to use the device as well + xp, _, device = get_namespace(self.means_) if n_samples < 1: raise ValueError( @@ -449,7 +452,7 @@ def sample(self, n_samples=1): n_samples_comp = rng.multinomial(n_samples, self.weights_) if self.covariance_type == "full": - X = np.vstack( + X = xp.concat( [ rng.multivariate_normal(mean, covariance, int(sample)) for (mean, covariance, sample) in zip( @@ -458,26 +461,26 @@ def sample(self, n_samples=1): ] ) elif self.covariance_type == "tied": - X = np.vstack( + X = xp.concat( [ rng.multivariate_normal(mean, self.covariances_, int(sample)) for (mean, sample) in zip(self.means_, n_samples_comp) ] ) else: - X = np.vstack( + X = xp.concat( [ mean + rng.standard_normal(size=(sample, n_features)) - * np.sqrt(covariance) + * xp.sqrt(covariance) for (mean, covariance, sample) in zip( self.means_, self.covariances_, n_samples_comp ) ] ) - y = np.concatenate( - [np.full(sample, j, dtype=int) for j, sample in enumerate(n_samples_comp)] + y = xp.concat( + [xp.full(sample, j, dtype=int) for j, sample in enumerate(n_samples_comp)] ) return (X, y) @@ -544,6 +547,9 @@ def _estimate_log_prob_resp(self, X, xp=None): weighted_log_prob = self._estimate_weighted_log_prob(X, xp=xp) log_prob_norm = _logsumexp(weighted_log_prob, axis=1, xp=xp) + # TODO np.errstate not in the array API spec, decide what to do here + # maybe something like this + # context_manager = np.errstate(under="ignore") if xp is np else nullcontext with np.errstate(under="ignore"): # ignore underflow log_resp = weighted_log_prob - log_prob_norm[:, xp.newaxis] diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 4d19718955d00..1e89bc5f3dbab 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -2,8 +2,8 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause +import math -import numpy as np from scipy import linalg from .._config import get_config @@ -54,7 +54,7 @@ def _check_weights(weights, n_components, xp=None): return weights -def _check_means(means, n_components, n_features): +def _check_means(means, n_components, n_features, xp=None): """Validate the provided 'means'. Parameters @@ -72,34 +72,39 @@ def _check_means(means, n_components, n_features): ------- means : array, (n_components, n_features) """ - means = check_array(means, dtype=[np.float64, np.float32], ensure_2d=False) + xp, _ = get_namespace(means, xp=xp) + means = check_array(means, dtype=[xp.float64, xp.float32], ensure_2d=False) _check_shape(means, (n_components, n_features), "means") return means -def _check_precision_positivity(precision, covariance_type): +def _check_precision_positivity(precision, covariance_type, xp=None): """Check a precision vector is positive-definite.""" - if np.any(np.less_equal(precision, 0.0)): + xp, _ = get_namespace(precision, xp=xp) + if xp.any(xp.less_equal(precision, 0.0)): raise ValueError("'%s precision' should be positive" % covariance_type) -def _check_precision_matrix(precision, covariance_type): +def _check_precision_matrix(precision, covariance_type, xp=None): """Check a precision matrix is symmetric and positive-definite.""" + xp, _ = get_namespace(precision, xp=xp) if not ( - np.allclose(precision, precision.T) and np.all(linalg.eigvalsh(precision) > 0.0) + xpx.isclose(precision, precision.T) + and xp.all(xp.linalg.eigvalsh(precision) > 0.0) ): raise ValueError( "'%s precision' should be symmetric, positive-definite" % covariance_type ) -def _check_precisions_full(precisions, covariance_type): +def _check_precisions_full(precisions, covariance_type, xp=None): """Check the precision matrices are symmetric and positive-definite.""" + xp, _ = get_namespace(precisions, xp=xp) for prec in precisions: - _check_precision_matrix(prec, covariance_type) + _check_precision_matrix(prec, covariance_type, xp=xp) -def _check_precisions(precisions, covariance_type, n_components, n_features): +def _check_precisions(precisions, covariance_type, n_components, n_features, xp=None): """Validate user provided precisions. Parameters @@ -122,9 +127,10 @@ def _check_precisions(precisions, covariance_type, n_components, n_features): ------- precisions : array """ + xp, _ = get_namespace(precisions, xp=xp) precisions = check_array( precisions, - dtype=[np.float64, np.float32], + dtype=[xp.float64, xp.float32], ensure_2d=False, allow_nd=covariance_type == "full", ) @@ -145,7 +151,7 @@ def _check_precisions(precisions, covariance_type, n_components, n_features): "diag": _check_precision_positivity, "spherical": _check_precision_positivity, } - _check_precisions[covariance_type](precisions, covariance_type) + _check_precisions[covariance_type](precisions, covariance_type, xp=xp) return precisions @@ -204,12 +210,11 @@ def _estimate_gaussian_covariances_tied(resp, X, nk, means, reg_covar, xp=None): covariance : array, shape (n_features, n_features) The tied covariance matrix of the components. """ - # TODO still using np here ... - avg_X2 = np.dot(X.T, X) - avg_means2 = np.dot(nk * means.T, means) + avg_X2 = X.T @ X + avg_means2 = nk * means.T @ means covariance = avg_X2 - avg_means2 - covariance /= nk.sum() - covariance.flat[:: len(covariance) + 1] += reg_covar + covariance /= xp.sum(nk) + covariance[:, 0] += reg_covar return covariance @@ -323,7 +328,7 @@ def _compute_precision_cholesky(covariances, covariance_type, xp=None): The cholesky decomposition of sample precisions of the current components. The shape depends of the covariance_type. """ - xp, _ = get_namespace(covariances, xp=xp) + xp, _, device_ = get_namespace_and_device(covariances, xp=xp) estimate_precision_error_message = ( "Fitting the mixture model failed because some components have " @@ -358,11 +363,16 @@ def _compute_precision_cholesky(covariances, covariance_type, xp=None): elif covariance_type == "tied": _, n_features = covariances.shape try: - cov_chol = linalg.cholesky(covariances, lower=True) + # TODO we are using xp.linalg instead of scipy.linalg.cholesky, maybe + # separate branches for array API and numpy? + cov_chol = xp.linalg.cholesky(covariances) except linalg.LinAlgError: raise ValueError(estimate_precision_error_message) - precisions_chol = linalg.solve_triangular( - cov_chol, xp.eye(n_features, dtype=dtype), lower=True + # TODO we are using xp.linalg.solve instead of scipy.linalg.solve_triangular + # probably separate branches for array API and numpy? maybe + # https://github.com/scikit-learn/scikit-learn/pull/29318 is relevant + precisions_chol = xp.linalg.solve( + cov_chol, xp.eye(n_features, dtype=dtype, device=device_) ).T else: if xp.any(covariances <= 0.0): @@ -371,9 +381,10 @@ def _compute_precision_cholesky(covariances, covariance_type, xp=None): return precisions_chol -def _flipudlr(array): +def _flipudlr(array, xp=None): """Reverse the rows and columns of an array.""" - return np.flipud(np.fliplr(array)) + xp, _ = get_namespace(array, xp=xp) + return xp.flip(xp.flip(array, axis=1), axis=0) def _compute_precision_cholesky_from_precisions(precisions, covariance_type, xp=None): @@ -410,20 +421,19 @@ def _compute_precision_cholesky_from_precisions(precisions, covariance_type, xp= The cholesky decomposition of sample precisions of the current components. The shape depends on the covariance_type. """ - # TODO still using np here ... if covariance_type == "full": - precisions_cholesky = np.array( + precisions_cholesky = xp.asarray( [ - _flipudlr(linalg.cholesky(_flipudlr(precision), lower=True)) + _flipudlr(xp.linalg.cholesky(_flipudlr(precision, xp=xp)), xp=xp) for precision in precisions ] ) elif covariance_type == "tied": precisions_cholesky = _flipudlr( - linalg.cholesky(_flipudlr(precisions), lower=True) + xp.linalg.cholesky(_flipudlr(precisions, xp=xp)), xp=xp ) else: - precisions_cholesky = np.sqrt(precisions) + precisions_cholesky = xp.sqrt(precisions) return precisions_cholesky @@ -459,7 +469,7 @@ def _compute_log_det_cholesky(matrix_chol, covariance_type, n_features, xp=None) ) elif covariance_type == "tied": - log_det_chol = xp.sum(xp.log(xp.diagonal(matrix_chol))) + log_det_chol = xp.sum(xp.log(xp.linalg.diagonal(matrix_chol))) elif covariance_type == "diag": log_det_chol = xp.sum(xp.log(matrix_chol), axis=1) @@ -492,7 +502,7 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type, xp=N ------- log_prob : array, shape (n_samples, n_components) """ - xp, _, device = get_namespace_and_device(X, means, precisions_chol, xp=xp) + xp, _, device_ = get_namespace_and_device(X, means, precisions_chol, xp=xp) n_samples, n_features = X.shape n_components, _ = means.shape # The determinant of the precision matrix from the Cholesky decomposition @@ -502,14 +512,15 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type, xp=N log_det = _compute_log_det_cholesky(precisions_chol, covariance_type, n_features) if covariance_type == "full": - log_prob = xp.empty((n_samples, n_components), dtype=X.dtype) + log_prob = xp.empty((n_samples, n_components), dtype=X.dtype, device=device_) for k, (mu, prec_chol) in enumerate(zip(means, precisions_chol)): y = (X @ prec_chol) - (mu @ prec_chol) log_prob[:, k] = xp.sum(xp.square(y), axis=1) elif covariance_type == "tied": - log_prob = xp.empty((n_samples, n_components), dtype=X.dtype) - for k, mu in enumerate(means): + log_prob = xp.empty((n_samples, n_components), dtype=X.dtype, device=device_) + for k in range(means.shape[0]): + mu = means[k, :] y = (X @ precisions_chol) - (mu @ precisions_chol) log_prob[:, k] = xp.sum(xp.square(y), axis=1) @@ -533,7 +544,7 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type, xp=N return ( -0.5 * ( - n_features * xp.log(xp.asarray(2 * xp.pi, dtype=X.dtype, device=device)) + n_features * xp.log(xp.asarray(2 * xp.pi, dtype=X.dtype, device=device_)) + log_prob ) + log_det @@ -788,7 +799,7 @@ def _check_parameters(self, X, xp=None): if self.means_init is not None: self.means_init = _check_means( - self.means_init, self.n_components, n_features + self.means_init, self.n_components, n_features, xp=xp ) if self.precisions_init is not None: @@ -797,6 +808,7 @@ def _check_parameters(self, X, xp=None): self.covariance_type, self.n_components, n_features, + xp=xp, ) allowed_init_params = ["random", "random_from_data"] @@ -901,7 +913,8 @@ def _get_parameters(self): self.precisions_cholesky_, ) - def _set_parameters(self, params): + def _set_parameters(self, params, xp=None): + xp, _, device_ = get_namespace_and_device(params, xp=xp) ( self.weights_, self.means_, @@ -914,14 +927,13 @@ def _set_parameters(self, params): dtype = self.precisions_cholesky_.dtype if self.covariance_type == "full": - self.precisions_ = np.empty_like(self.precisions_cholesky_) + self.precisions_ = xp.empty_like(self.precisions_cholesky_, device=device_) for k, prec_chol in enumerate(self.precisions_cholesky_): - self.precisions_[k] = np.dot(prec_chol, prec_chol.T) + self.precisions_[k] = prec_chol @ prec_chol.T elif self.covariance_type == "tied": - self.precisions_ = np.dot( - self.precisions_cholesky_, self.precisions_cholesky_.T - ) + self.precisions_ = self.precisions_cholesky_ @ self.precisions_cholesky_.T + else: self.precisions_ = self.precisions_cholesky_**2 @@ -958,7 +970,7 @@ def bic(self, X): bic : float The lower the better. """ - return -2 * self.score(X) * X.shape[0] + self._n_parameters() * np.log( + return -2 * self.score(X) * X.shape[0] + self._n_parameters() * math.log( X.shape[0] ) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index ee8451c94dc1c..1226295335b65 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -9,7 +9,6 @@ from io import StringIO from unittest.mock import Mock -import array_api_strict import numpy as np import pytest from scipy import linalg, stats @@ -1515,6 +1514,9 @@ def test_gaussian_mixture_array_api_compliance( assert_allclose(means_, _convert_to_numpy(gmm.means_, xp=xp)) assert_allclose(covariances_, _convert_to_numpy(gmm.covariances_, xp=xp)) + # TODO Maybe we should test the sample method + # TODO test means_init and precisions_init + @pytest.mark.parametrize( "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations() @@ -1545,42 +1547,45 @@ def test_gaussian_mixture_array_api_with_weights_init( assert device(X) == device(gmm.weights_) -@pytest.mark.parametrize( - "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations() -) -def test_gaussian_mixture_array_api_different_namespaces( - array_namespace, device_, dtype, global_random_seed -): - """Check that passing `weights_init` in a different namespace during instantiation - correctly converts to the same namespace as X.""" - X, _ = make_blobs( - n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed - ) - - xp = _array_api_for_tests(array_namespace, device_) - X = xp.asarray(X, device=device_) - - """# check with weights_init being a numpy array - with sklearn.config_context(array_api_dispatch=True): - gmm = GaussianMixture( - n_components=3, - covariance_type="diag", - random_state=global_random_seed, - init_params="random", - weights_init=np.asarray([0.1, 0.4, 0.5]), - ) - gmm.fit(X)""" - - # check with weights_init being an array_api_strict array - with sklearn.config_context(array_api_dispatch=True): - gmm = GaussianMixture( - n_components=3, - covariance_type="diag", - random_state=global_random_seed, - init_params="random", - weights_init=array_api_strict.asarray([0.1, 0.4, 0.5]), - ) - gmm.fit(X) +# TODO What is the expected behavior when weights init +# and X are not in the same namespace/device? +# It feels like check_array would need a xp argument? +# @pytest.mark.parametrize( +# "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations() +# ) +# def test_gaussian_mixture_array_api_different_namespaces( +# array_namespace, device_, dtype, global_random_seed +# ): +# """Check that passing `weights_init` in a different namespace during instantiation +# correctly converts to the same namespace as X.""" +# X, _ = make_blobs( +# n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed +# ) + +# xp = _array_api_for_tests(array_namespace, device_) +# X = xp.asarray(X, device=device_) + +# # check with weights_init being a numpy array +# with sklearn.config_context(array_api_dispatch=True): +# gmm = GaussianMixture( +# n_components=3, +# covariance_type="diag", +# random_state=global_random_seed, +# init_params="random", +# weights_init=np.asarray([0.1, 0.4, 0.5]), +# ) +# gmm.fit(X) + +# # check with weights_init being an array_api_strict array +# with sklearn.config_context(array_api_dispatch=True): +# gmm = GaussianMixture( +# n_components=3, +# covariance_type="diag", +# random_state=global_random_seed, +# init_params="random", +# weights_init=array_api_strict.asarray([0.1, 0.4, 0.5]), +# ) +# gmm.fit(X) # TODO: remove when gmm works with `init_params` `kmeans` or `k-means++` @@ -1607,3 +1612,37 @@ def test_gaussian_mixture_raises_where_array_api_not_implemented( match="Allowed `init_params`.+if 'array_api_dispatch' is enabled", ): gmm.fit(X) + + +@pytest.mark.parametrize("init_params", ["random", "random_from_data"]) +@pytest.mark.parametrize( + "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations() +) +def test_gaussian_mixture_array_api_compliance_covariance_type_tied( + init_params, array_namespace, device_, dtype, global_random_seed +): + X, _ = make_blobs( + n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed + ) + gmm = GaussianMixture( + n_components=3, + covariance_type="tied", + random_state=global_random_seed, + init_params=init_params, + ) + + gmm.fit(X) + means_ = gmm.means_ + covariances_ = gmm.covariances_ + + xp = _array_api_for_tests(array_namespace, device_) + X = xp.asarray(X, device=device_) + + with sklearn.config_context(array_api_dispatch=True): + gmm.fit(X) + + assert device(X) == device(gmm.means_) + assert device(X) == device(gmm.covariances_) + + assert_allclose(means_, _convert_to_numpy(gmm.means_, xp=xp)) + assert_allclose(covariances_, _convert_to_numpy(gmm.covariances_, xp=xp)) From 2b80ac921e6c5ed81a8ec5ea310a0c0c97c7912e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Thu, 3 Apr 2025 14:03:07 +0200 Subject: [PATCH 45/92] Fix easy failures --- sklearn/mixture/_base.py | 2 +- sklearn/mixture/_bayesian_mixture.py | 2 +- sklearn/mixture/_gaussian_mixture.py | 5 +++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index 05dca67346ae4..43c883de32f64 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -439,7 +439,7 @@ def sample(self, n_samples=1): check_is_fitted(self) # TODO what is a cleaner way to do this, should we have a self.xp_? # TODO we probably want to use the device as well - xp, _, device = get_namespace(self.means_) + xp, _ = get_namespace(self.means_) if n_samples < 1: raise ValueError( diff --git a/sklearn/mixture/_bayesian_mixture.py b/sklearn/mixture/_bayesian_mixture.py index 2a62f159b1df6..9a991f19d3d40 100644 --- a/sklearn/mixture/_bayesian_mixture.py +++ b/sklearn/mixture/_bayesian_mixture.py @@ -850,7 +850,7 @@ def _get_parameters(self): self.precisions_cholesky_, ) - def _set_parameters(self, params): + def _set_parameters(self, params, xp=None): ( self.weight_concentration_, self.mean_precision_, diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 1e89bc5f3dbab..35bbaadb55d42 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -46,7 +46,7 @@ def _check_weights(weights, n_components, xp=None): # check normalization atol = 1e-6 if weights.dtype == xp.float32 else 1e-8 - if not xpx.isclose(xp.abs(1.0 - xp.sum(weights)), 0.0, atol=atol, xp=xp): + if not xp.all(xpx.isclose(xp.abs(1.0 - xp.sum(weights)), 0.0, atol=atol, xp=xp)): raise ValueError( "The parameter 'weights' should be normalized, but got sum(weights) = %.5f" % xp.sum(weights) @@ -89,7 +89,7 @@ def _check_precision_matrix(precision, covariance_type, xp=None): """Check a precision matrix is symmetric and positive-definite.""" xp, _ = get_namespace(precision, xp=xp) if not ( - xpx.isclose(precision, precision.T) + xp.all(xpx.isclose(precision, precision.T)) and xp.all(xp.linalg.eigvalsh(precision) > 0.0) ): raise ValueError( @@ -210,6 +210,7 @@ def _estimate_gaussian_covariances_tied(resp, X, nk, means, reg_covar, xp=None): covariance : array, shape (n_features, n_features) The tied covariance matrix of the components. """ + xp, _ = get_namespace(X, means, xp=xp) avg_X2 = X.T @ X avg_means2 = nk * means.T @ means covariance = avg_X2 - avg_means2 From 3287a5006082a6473dc356d605f52a60327141bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Thu, 3 Apr 2025 14:27:58 +0200 Subject: [PATCH 46/92] Fix [azure parallel] --- sklearn/mixture/_gaussian_mixture.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 35bbaadb55d42..5b4ca4e646385 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -215,7 +215,8 @@ def _estimate_gaussian_covariances_tied(resp, X, nk, means, reg_covar, xp=None): avg_means2 = nk * means.T @ means covariance = avg_X2 - avg_means2 covariance /= xp.sum(nk) - covariance[:, 0] += reg_covar + my_flat = xp.reshape(covariance, (-1,)) + my_flat[:: covariance.shape[0] + 1] += reg_covar return covariance From fb72f790d2af544df80cf1022144c7b16817f772 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Fri, 4 Apr 2025 14:08:46 +0200 Subject: [PATCH 47/92] array api support for covariance type 'full' + test --- .../array-api/30777.feature.rst | 4 +- sklearn/mixture/_gaussian_mixture.py | 35 ++++++++++------ .../mixture/tests/test_gaussian_mixture.py | 42 +++---------------- 3 files changed, 29 insertions(+), 52 deletions(-) diff --git a/doc/whats_new/upcoming_changes/array-api/30777.feature.rst b/doc/whats_new/upcoming_changes/array-api/30777.feature.rst index 84a1b16855c84..b3f0751fa0a0d 100644 --- a/doc/whats_new/upcoming_changes/array-api/30777.feature.rst +++ b/doc/whats_new/upcoming_changes/array-api/30777.feature.rst @@ -1,4 +1,4 @@ - :class:`sklearn.gaussian_mixture.GaussianMixture` with - `init_params` "random" or "random_from_data" and `covariance_type="diag"` and - `warm_start=False` now supports Array API compatible inputs. + `init_params` `"random"` or `"random_from_data"` and `warm_start=False` now supports + Array API compatible inputs. By :user:`Stefanie Senger ` and :user:`Loïc Estève ` diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 5b4ca4e646385..6a5194ce76f60 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -179,13 +179,15 @@ def _estimate_gaussian_covariances_full(resp, X, nk, means, reg_covar, xp=None): covariances : array, shape (n_components, n_features, n_features) The covariance matrix of the current components. """ - xp, _ = get_namespace(X, xp=xp) + xp, _, device_ = get_namespace_and_device(X, xp=xp) n_components, n_features = means.shape - covariances = xp.empty((n_components, n_features, n_features), dtype=X.dtype) + covariances = xp.empty( + (n_components, n_features, n_features), device=device_, dtype=X.dtype + ) for k in range(n_components): - diff = X - means[k] - covariances[k] = ((resp[:, k] * diff.T) @ diff) / nk[k] - my_flat = xp.reshape(covariances[k], (-1,)) + diff = X - means[k, ...] + covariances[k, ...] = ((resp[:, k] * diff.T) @ diff) / nk[k] + my_flat = xp.reshape(covariances[k, ...], (-1,)) my_flat[:: n_features + 1] += reg_covar return covariances @@ -347,8 +349,11 @@ def _compute_precision_cholesky(covariances, covariance_type, xp=None): if covariance_type == "full": n_components, n_features, _ = covariances.shape - precisions_chol = xp.empty((n_components, n_features, n_features), dtype=dtype) - for k, covariance in enumerate(covariances): + precisions_chol = xp.empty( + (n_components, n_features, n_features), device=device_, dtype=dtype + ) + for k in range(covariances.shape[0]): + covariance = covariances[k, ...] try: # TODO we are using xp.linalg instead of scipy.linalg.cholesky, maybe # separate branches for array API and numpy? @@ -359,8 +364,8 @@ def _compute_precision_cholesky(covariances, covariance_type, xp=None): # TODO we are using xp.linalg.solve instead of scipy.linalg.solve_triangular # probably separate branches for array API and numpy? maybe # https://github.com/scikit-learn/scikit-learn/pull/29318 is relevant - precisions_chol[k] = xp.linalg.solve( - cov_chol, xp.eye(n_features, dtype=dtype) + precisions_chol[k, ...] = xp.linalg.solve( + cov_chol, xp.eye(n_features, device=device_, dtype=dtype) ).T elif covariance_type == "tied": _, n_features = covariances.shape @@ -467,7 +472,8 @@ def _compute_log_det_cholesky(matrix_chol, covariance_type, n_features, xp=None) if covariance_type == "full": n_components, _, _ = matrix_chol.shape log_det_chol = xp.sum( - xp.log(matrix_chol.reshape(n_components, -1)[:, :: n_features + 1]), axis=1 + xp.log(xp.reshape(matrix_chol, (n_components, -1))[:, :: n_features + 1]), + axis=1, ) elif covariance_type == "tied": @@ -515,7 +521,9 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type, xp=N if covariance_type == "full": log_prob = xp.empty((n_samples, n_components), dtype=X.dtype, device=device_) - for k, (mu, prec_chol) in enumerate(zip(means, precisions_chol)): + for k in range(means.shape[0]): + mu = means[k, ...] + prec_chol = precisions_chol[k, ...] y = (X @ prec_chol) - (mu @ prec_chol) log_prob[:, k] = xp.sum(xp.square(y), axis=1) @@ -930,8 +938,9 @@ def _set_parameters(self, params, xp=None): dtype = self.precisions_cholesky_.dtype if self.covariance_type == "full": self.precisions_ = xp.empty_like(self.precisions_cholesky_, device=device_) - for k, prec_chol in enumerate(self.precisions_cholesky_): - self.precisions_[k] = prec_chol @ prec_chol.T + for k in range(self.precisions_cholesky_.shape[0]): + prec_chol = self.precisions_cholesky_[k, ...] + self.precisions_[k, ...] = prec_chol @ prec_chol.T elif self.covariance_type == "tied": self.precisions_ = self.precisions_cholesky_ @ self.precisions_cholesky_.T diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index 1226295335b65..a222addbc0ace 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1482,18 +1482,20 @@ def test_gaussian_mixture_all_init_does_not_estimate_gaussian_parameters( @pytest.mark.parametrize("init_params", ["random", "random_from_data"]) +@pytest.mark.parametrize("covariance_type", ["full", "tied", "diag"]) @pytest.mark.parametrize( "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations() ) def test_gaussian_mixture_array_api_compliance( - init_params, array_namespace, device_, dtype, global_random_seed + init_params, covariance_type, array_namespace, device_, dtype, global_random_seed ): + """Test that array api works in GaussianMixtrue.fit.""" X, _ = make_blobs( n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed ) gmm = GaussianMixture( n_components=3, - covariance_type="diag", + covariance_type=covariance_type, random_state=global_random_seed, init_params=init_params, ) @@ -1521,7 +1523,7 @@ def test_gaussian_mixture_array_api_compliance( @pytest.mark.parametrize( "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations() ) -def test_gaussian_mixture_array_api_with_weights_init( +def test_gaussian_mixture_array_api_compliance_with_weights_init( array_namespace, device_, dtype, global_random_seed ): """Check that array api works with `weights_init`, which unlike other passed arrays @@ -1612,37 +1614,3 @@ def test_gaussian_mixture_raises_where_array_api_not_implemented( match="Allowed `init_params`.+if 'array_api_dispatch' is enabled", ): gmm.fit(X) - - -@pytest.mark.parametrize("init_params", ["random", "random_from_data"]) -@pytest.mark.parametrize( - "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations() -) -def test_gaussian_mixture_array_api_compliance_covariance_type_tied( - init_params, array_namespace, device_, dtype, global_random_seed -): - X, _ = make_blobs( - n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed - ) - gmm = GaussianMixture( - n_components=3, - covariance_type="tied", - random_state=global_random_seed, - init_params=init_params, - ) - - gmm.fit(X) - means_ = gmm.means_ - covariances_ = gmm.covariances_ - - xp = _array_api_for_tests(array_namespace, device_) - X = xp.asarray(X, device=device_) - - with sklearn.config_context(array_api_dispatch=True): - gmm.fit(X) - - assert device(X) == device(gmm.means_) - assert device(X) == device(gmm.covariances_) - - assert_allclose(means_, _convert_to_numpy(gmm.means_, xp=xp)) - assert_allclose(covariances_, _convert_to_numpy(gmm.covariances_, xp=xp)) From 964199700266de5d9626e36622c2ac52b492b035 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Mon, 7 Apr 2025 12:27:01 +0200 Subject: [PATCH 48/92] fix support for covariance_type='spherical' --- sklearn/mixture/_gaussian_mixture.py | 12 +++++++----- sklearn/mixture/tests/test_gaussian_mixture.py | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 6a5194ce76f60..116e5db61414d 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -268,9 +268,11 @@ def _estimate_gaussian_covariances_spherical(resp, X, nk, means, reg_covar, xp=N variances : array, shape (n_components,) The variance values of each components. """ - return _estimate_gaussian_covariances_diag( - resp, X, nk, means, reg_covar, xp=xp - ).mean(1) + xp, _ = get_namespace(X) + return xp.mean( + _estimate_gaussian_covariances_diag(resp, X, nk, means, reg_covar, xp=xp), + axis=1, + ) def _estimate_gaussian_parameters(X, resp, reg_covar, covariance_type, xp=None): @@ -545,9 +547,9 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type, xp=N elif covariance_type == "spherical": precisions = precisions_chol**2 log_prob = ( - xp.sum(means**2, 1) * precisions + xp.sum(means**2, axis=1) * precisions - 2 * (X @ means.T * precisions) - + xp.outer(row_norms(X, squared=True), precisions) + + xp.linalg.outer(row_norms(X, squared=True), precisions) ) # Since we are using the precision of the Cholesky decomposition, # `- 0.5 * log_det_precision` becomes `+ log_det_precision_chol` diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index a222addbc0ace..ecd2c3cd3cbf1 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1482,7 +1482,7 @@ def test_gaussian_mixture_all_init_does_not_estimate_gaussian_parameters( @pytest.mark.parametrize("init_params", ["random", "random_from_data"]) -@pytest.mark.parametrize("covariance_type", ["full", "tied", "diag"]) +@pytest.mark.parametrize("covariance_type", ["full", "tied", "diag", "spherical"]) @pytest.mark.parametrize( "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations() ) From 35a464409c7c24836cc33376875592e3d1c9186c Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Mon, 7 Apr 2025 14:09:13 +0200 Subject: [PATCH 49/92] add test for GaussianMixture.sample() --- sklearn/mixture/_base.py | 4 +-- .../mixture/tests/test_gaussian_mixture.py | 35 +++++++++++++++++-- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index 43c883de32f64..16691cd1ea403 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -31,7 +31,6 @@ def _check_shape(param, param_shape, name): name : str """ - # param = xp.array(param) if param.shape != param_shape: raise ValueError( "The parameter '%s' should have the shape of %s, but got %s" @@ -438,8 +437,7 @@ def sample(self, n_samples=1): """ check_is_fitted(self) # TODO what is a cleaner way to do this, should we have a self.xp_? - # TODO we probably want to use the device as well - xp, _ = get_namespace(self.means_) + xp, _, device_ = get_namespace_and_device(self.means_) if n_samples < 1: raise ValueError( diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index ecd2c3cd3cbf1..6f51b5242205a 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -33,6 +33,7 @@ from sklearn.utils._array_api import ( _convert_to_numpy, device, + get_namespace, yield_namespace_device_dtype_combinations, ) from sklearn.utils._testing import ( @@ -1489,7 +1490,7 @@ def test_gaussian_mixture_all_init_does_not_estimate_gaussian_parameters( def test_gaussian_mixture_array_api_compliance( init_params, covariance_type, array_namespace, device_, dtype, global_random_seed ): - """Test that array api works in GaussianMixtrue.fit.""" + """Test that array api works in GaussianMixture.fit().""" X, _ = make_blobs( n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed ) @@ -1516,7 +1517,6 @@ def test_gaussian_mixture_array_api_compliance( assert_allclose(means_, _convert_to_numpy(gmm.means_, xp=xp)) assert_allclose(covariances_, _convert_to_numpy(gmm.covariances_, xp=xp)) - # TODO Maybe we should test the sample method # TODO test means_init and precisions_init @@ -1614,3 +1614,34 @@ def test_gaussian_mixture_raises_where_array_api_not_implemented( match="Allowed `init_params`.+if 'array_api_dispatch' is enabled", ): gmm.fit(X) + + +@pytest.mark.parametrize("covariance_type", ["full", "tied", "diag"]) +@pytest.mark.parametrize( + "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations() +) +def test_gaussian_mixture_sample_array_api_compliance( + covariance_type, array_namespace, device_, dtype, global_random_seed +): + """Test that array api works in GaussianMixture.sample().""" + xp = _array_api_for_tests(array_namespace, device_) + X, _ = make_blobs( + n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed + ) + X = xp.asarray(X, device=device_) + + with sklearn.config_context(array_api_dispatch=True): + gmm = GaussianMixture( + n_components=3, + covariance_type=covariance_type, + random_state=global_random_seed, + init_params="random", + ) + gmm.fit(X) + X_sample, y_sample = gmm.sample() + + assert get_namespace(X_sample)[0] == xp + assert get_namespace(y_sample)[0] == xp + + assert device(X_sample) == device(X) + assert device(y_sample) == device(X) From 502d3e680738b2e2084d212c57d2bc799219bba9 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Mon, 7 Apr 2025 15:20:24 +0200 Subject: [PATCH 50/92] fix array api support in sample() with covariance_type='full' --- sklearn/mixture/_base.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index 16691cd1ea403..09ff25329218d 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -452,10 +452,15 @@ def sample(self, n_samples=1): if self.covariance_type == "full": X = xp.concat( [ - rng.multivariate_normal(mean, covariance, int(sample)) - for (mean, covariance, sample) in zip( - self.means_, self.covariances_, n_samples_comp + xp.asarray( + rng.multivariate_normal( + self.means_[i, ...], + self.covariances_[i, ...], + int(n_samples_comp[i]), + ) ) + for i in range(len(n_samples_comp)) + if n_samples_comp[i] > 0 ] ) elif self.covariance_type == "tied": @@ -478,7 +483,10 @@ def sample(self, n_samples=1): ) y = xp.concat( - [xp.full(sample, j, dtype=int) for j, sample in enumerate(n_samples_comp)] + [ + xp.full(sample, j, dtype=xp.int32) + for j, sample in enumerate(n_samples_comp) + ] ) return (X, y) From 148381dd6f99d4e466f8152ffdd3f195d4768fb4 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Mon, 7 Apr 2025 15:35:03 +0200 Subject: [PATCH 51/92] fix array api support in sample() with other covariance_types for array_api_strict namespace --- sklearn/mixture/_base.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index 09ff25329218d..3f95bf56d33d2 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -460,25 +460,30 @@ def sample(self, n_samples=1): ) ) for i in range(len(n_samples_comp)) - if n_samples_comp[i] > 0 ] ) elif self.covariance_type == "tied": X = xp.concat( [ - rng.multivariate_normal(mean, self.covariances_, int(sample)) - for (mean, sample) in zip(self.means_, n_samples_comp) + xp.asarray( + rng.multivariate_normal( + self.means_[i, ...], + self.covariances_, + int(n_samples_comp[i]), + ) + ) + for i in range(len(n_samples_comp)) ] ) else: X = xp.concat( [ - mean - + rng.standard_normal(size=(sample, n_features)) - * xp.sqrt(covariance) - for (mean, covariance, sample) in zip( - self.means_, self.covariances_, n_samples_comp + self.means_[i, ...] + + xp.asarray( + rng.standard_normal(size=(n_samples_comp[i, ...], n_features)) ) + * xp.sqrt(self.covariances_[i, ...]) + for i in range(len(n_samples_comp)) ] ) From d565cf90048d041a2f0d12da22d1ea8b7b465f59 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Mon, 7 Apr 2025 16:06:08 +0200 Subject: [PATCH 52/92] fix torch dtype issue in xp.full --- sklearn/mixture/_base.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index 3f95bf56d33d2..5a871602db1f6 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -437,7 +437,7 @@ def sample(self, n_samples=1): """ check_is_fitted(self) # TODO what is a cleaner way to do this, should we have a self.xp_? - xp, _, device_ = get_namespace_and_device(self.means_) + xp, _ = get_namespace(self.means_) if n_samples < 1: raise ValueError( @@ -487,11 +487,17 @@ def sample(self, n_samples=1): ] ) - y = xp.concat( + """y = xp.concat( [ - xp.full(sample, j, dtype=xp.int32) + xp.full(int(sample), j, dtype=xp.int32) for j, sample in enumerate(n_samples_comp) ] + )""" + y = xp.concat( + [ + xp.full(int(n_samples_comp[i]), i, dtype=xp.int32) + for i in range(len(n_samples_comp)) + ] ) return (X, y) From c836e8dc1801adc96a72336a505ad76b17fda2df Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Wed, 9 Apr 2025 11:23:25 +0200 Subject: [PATCH 53/92] use numpy for random reneration in sample --- sklearn/mixture/_base.py | 65 ++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 33 deletions(-) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index 5a871602db1f6..f15c0547e4ec4 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -15,7 +15,12 @@ from ..cluster import kmeans_plusplus from ..exceptions import ConvergenceWarning from ..utils import check_random_state -from ..utils._array_api import _logsumexp, get_namespace, get_namespace_and_device +from ..utils._array_api import ( + _convert_to_numpy, + _logsumexp, + get_namespace, + get_namespace_and_device, +) from ..utils._param_validation import Interval, StrOptions from ..utils.validation import check_is_fitted, validate_data @@ -437,7 +442,7 @@ def sample(self, n_samples=1): """ check_is_fitted(self) # TODO what is a cleaner way to do this, should we have a self.xp_? - xp, _ = get_namespace(self.means_) + xp, _, device_ = get_namespace_and_device(self.means_) if n_samples < 1: raise ValueError( @@ -447,60 +452,54 @@ def sample(self, n_samples=1): _, n_features = self.means_.shape rng = check_random_state(self.random_state) - n_samples_comp = rng.multinomial(n_samples, self.weights_) + n_samples_comp = rng.multinomial( + n_samples, _convert_to_numpy(self.weights_, xp) + ) if self.covariance_type == "full": - X = xp.concat( + X = np.vstack( [ - xp.asarray( - rng.multivariate_normal( - self.means_[i, ...], - self.covariances_[i, ...], - int(n_samples_comp[i]), - ) + rng.multivariate_normal(mean, covariance, int(sample)) + for (mean, covariance, sample) in zip( + _convert_to_numpy(self.means_, xp), + _convert_to_numpy(self.covariances_, xp), + n_samples_comp, ) - for i in range(len(n_samples_comp)) ] ) elif self.covariance_type == "tied": - X = xp.concat( + X = np.vstack( [ - xp.asarray( - rng.multivariate_normal( - self.means_[i, ...], - self.covariances_, - int(n_samples_comp[i]), - ) + rng.multivariate_normal( + mean, _convert_to_numpy(self.covariances_, xp), int(sample) + ) + for (mean, sample) in zip( + _convert_to_numpy(self.means_, xp), n_samples_comp ) - for i in range(len(n_samples_comp)) ] ) else: - X = xp.concat( + X = np.vstack( [ - self.means_[i, ...] - + xp.asarray( - rng.standard_normal(size=(n_samples_comp[i, ...], n_features)) + mean + + rng.standard_normal(size=(sample, n_features)) + * np.sqrt(covariance) + for (mean, covariance, sample) in zip( + _convert_to_numpy(self.means_, xp), + _convert_to_numpy(self.covariances_, xp), + n_samples_comp, ) - * xp.sqrt(self.covariances_[i, ...]) - for i in range(len(n_samples_comp)) ] ) - """y = xp.concat( - [ - xp.full(int(sample), j, dtype=xp.int32) - for j, sample in enumerate(n_samples_comp) - ] - )""" y = xp.concat( [ - xp.full(int(n_samples_comp[i]), i, dtype=xp.int32) + xp.full(int(n_samples_comp[i]), i, dtype=xp.int32, device=device_) for i in range(len(n_samples_comp)) ] ) - return (X, y) + return xp.asarray(X, device=device_), y def _estimate_weighted_log_prob(self, X, xp=None): """Estimate the weighted log-probabilities, log P(X | Z) + log weights. From 668c1b0c11013c5c216ccb04cc732c25db49845f Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Wed, 9 Apr 2025 12:00:34 +0200 Subject: [PATCH 54/92] remove old comment --- sklearn/mixture/_base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index f15c0547e4ec4..1aad065008252 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -441,7 +441,6 @@ def sample(self, n_samples=1): Component labels. """ check_is_fitted(self) - # TODO what is a cleaner way to do this, should we have a self.xp_? xp, _, device_ = get_namespace_and_device(self.means_) if n_samples < 1: From 7fef10aa29f2c78dfeea8c17146aa9a808a09810 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Wed, 9 Apr 2025 14:33:54 +0200 Subject: [PATCH 55/92] Only use np.errstate for numpy namespace --- sklearn/mixture/_base.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index 1aad065008252..05ea1c8c74306 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -5,6 +5,7 @@ import warnings from abc import ABCMeta, abstractmethod +from contextlib import nullcontext from numbers import Integral, Real from time import time @@ -17,6 +18,7 @@ from ..utils import check_random_state from ..utils._array_api import ( _convert_to_numpy, + _is_numpy_namespace, _logsumexp, get_namespace, get_namespace_and_device, @@ -562,10 +564,11 @@ def _estimate_log_prob_resp(self, X, xp=None): weighted_log_prob = self._estimate_weighted_log_prob(X, xp=xp) log_prob_norm = _logsumexp(weighted_log_prob, axis=1, xp=xp) - # TODO np.errstate not in the array API spec, decide what to do here - # maybe something like this - # context_manager = np.errstate(under="ignore") if xp is np else nullcontext - with np.errstate(under="ignore"): + # There is no errstate equivalent for warning/error management in array API + context_manager = ( + np.errstate(under="ignore") if _is_numpy_namespace(xp) else nullcontext() + ) + with context_manager: # ignore underflow log_resp = weighted_log_prob - log_prob_norm[:, xp.newaxis] return log_prob_norm, log_resp From c9a355d868e9db1c4f31c8975e9d1730b074549b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Wed, 9 Apr 2025 15:22:40 +0200 Subject: [PATCH 56/92] Use int64 to be closer to previous code that was doing dtype=int --- sklearn/mixture/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index 05ea1c8c74306..1ef1940b60a59 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -495,7 +495,7 @@ def sample(self, n_samples=1): y = xp.concat( [ - xp.full(int(n_samples_comp[i]), i, dtype=xp.int32, device=device_) + xp.full(int(n_samples_comp[i]), i, dtype=xp.int64, device=device_) for i in range(len(n_samples_comp)) ] ) From a7121815a7c2b5cf31a405861f327b66706fa7db Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Wed, 7 May 2025 10:20:51 +0200 Subject: [PATCH 57/92] colons instead of elipsis --- sklearn/mixture/_gaussian_mixture.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 116e5db61414d..eb9a8ebf54c45 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -185,9 +185,9 @@ def _estimate_gaussian_covariances_full(resp, X, nk, means, reg_covar, xp=None): (n_components, n_features, n_features), device=device_, dtype=X.dtype ) for k in range(n_components): - diff = X - means[k, ...] - covariances[k, ...] = ((resp[:, k] * diff.T) @ diff) / nk[k] - my_flat = xp.reshape(covariances[k, ...], (-1,)) + diff = X - means[k, :] + covariances[k, :, :] = ((resp[:, k] * diff.T) @ diff) / nk[k] + my_flat = xp.reshape(covariances[k, :, :], (-1,)) my_flat[:: n_features + 1] += reg_covar return covariances @@ -355,7 +355,7 @@ def _compute_precision_cholesky(covariances, covariance_type, xp=None): (n_components, n_features, n_features), device=device_, dtype=dtype ) for k in range(covariances.shape[0]): - covariance = covariances[k, ...] + covariance = covariances[k, :, :] try: # TODO we are using xp.linalg instead of scipy.linalg.cholesky, maybe # separate branches for array API and numpy? @@ -366,7 +366,7 @@ def _compute_precision_cholesky(covariances, covariance_type, xp=None): # TODO we are using xp.linalg.solve instead of scipy.linalg.solve_triangular # probably separate branches for array API and numpy? maybe # https://github.com/scikit-learn/scikit-learn/pull/29318 is relevant - precisions_chol[k, ...] = xp.linalg.solve( + precisions_chol[k, :, :] = xp.linalg.solve( cov_chol, xp.eye(n_features, device=device_, dtype=dtype) ).T elif covariance_type == "tied": @@ -524,8 +524,8 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type, xp=N if covariance_type == "full": log_prob = xp.empty((n_samples, n_components), dtype=X.dtype, device=device_) for k in range(means.shape[0]): - mu = means[k, ...] - prec_chol = precisions_chol[k, ...] + mu = means[k, :] + prec_chol = precisions_chol[k, :, :] y = (X @ prec_chol) - (mu @ prec_chol) log_prob[:, k] = xp.sum(xp.square(y), axis=1) @@ -941,8 +941,8 @@ def _set_parameters(self, params, xp=None): if self.covariance_type == "full": self.precisions_ = xp.empty_like(self.precisions_cholesky_, device=device_) for k in range(self.precisions_cholesky_.shape[0]): - prec_chol = self.precisions_cholesky_[k, ...] - self.precisions_[k, ...] = prec_chol @ prec_chol.T + prec_chol = self.precisions_cholesky_[k, :, :] + self.precisions_[k, :, :] = prec_chol @ prec_chol.T elif self.covariance_type == "tied": self.precisions_ = self.precisions_cholesky_ @ self.precisions_cholesky_.T From 038632fc188eb67ce4118b54fb16cae813ab4d6b Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Wed, 7 May 2025 10:28:51 +0200 Subject: [PATCH 58/92] revert changes in k-means initialisation --- sklearn/mixture/_base.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index 1ef1940b60a59..e43e1023b9a5f 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -116,7 +116,7 @@ def _initialize_parameters(self, X, random_state, xp=None): n_samples, _ = X.shape if self.init_params == "kmeans": - resp = xp.zeros((n_samples, self.n_components), dtype=X.dtype) + resp = np.zeros((n_samples, self.n_components), dtype=X.dtype) label = ( cluster.KMeans( n_clusters=self.n_components, n_init=1, random_state=random_state @@ -124,7 +124,7 @@ def _initialize_parameters(self, X, random_state, xp=None): .fit(X) .labels_ ) - resp[xp.arange(n_samples), label] = 1 + resp[np.arange(n_samples), label] = 1 elif self.init_params == "random": resp = xp.asarray( random_state.uniform(size=(n_samples, self.n_components)), @@ -144,13 +144,13 @@ def _initialize_parameters(self, X, random_state, xp=None): for count, index in enumerate(indices): resp[index, count] = 1 elif self.init_params == "k-means++": - resp = xp.zeros((n_samples, self.n_components), dtype=X.dtype) + resp = np.zeros((n_samples, self.n_components), dtype=X.dtype) _, indices = kmeans_plusplus( X, self.n_components, random_state=random_state, ) - resp[indices, xp.arange(self.n_components)] = 1 + resp[indices, np.arange(self.n_components)] = 1 self._initialize(X, resp) From 18b3fe0437cac0f3aa81e4a04deb2a0949e043b5 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Wed, 7 May 2025 10:59:54 +0200 Subject: [PATCH 59/92] add smote test for other methods --- sklearn/mixture/_base.py | 4 +++- sklearn/mixture/tests/test_gaussian_mixture.py | 7 +++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index e43e1023b9a5f..19952b6c8224b 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -386,7 +386,9 @@ def score(self, X, y=None): log_likelihood : float Log-likelihood of `X` under the Gaussian mixture model. """ - return self.score_samples(X).mean() + # check if X is on the same namespace as fitted attributes: + xp, _ = get_namespace(X, self.means_) + return xp.mean(self.score_samples(X)) def predict(self, X): """Predict the labels for the data samples in X using trained model. diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index 6f51b5242205a..38be79124e43c 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1514,6 +1514,13 @@ def test_gaussian_mixture_array_api_compliance( assert device(X) == device(gmm.means_) assert device(X) == device(gmm.covariances_) + # smoke test other methods + # TODO: maybe test with X on different namespace/device as training + gmm.score_samples(X) + gmm.score(X) + gmm.aic(X) + gmm.bic(X) + assert_allclose(means_, _convert_to_numpy(gmm.means_, xp=xp)) assert_allclose(covariances_, _convert_to_numpy(gmm.covariances_, xp=xp)) From 8f00364e7e66d967d7789b45a2d4e0baaf9a0051 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Wed, 7 May 2025 12:39:36 +0200 Subject: [PATCH 60/92] add lacking check_is_fitted to BaseMixture.score --- sklearn/mixture/_base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index 19952b6c8224b..7b4f98d04797c 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -386,6 +386,7 @@ def score(self, X, y=None): log_likelihood : float Log-likelihood of `X` under the Gaussian mixture model. """ + check_is_fitted(self) # check if X is on the same namespace as fitted attributes: xp, _ = get_namespace(X, self.means_) return xp.mean(self.score_samples(X)) From 3aaabf5967029e7cdf2c60cfd540236a2e38149e Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Fri, 9 May 2025 14:42:44 +0200 Subject: [PATCH 61/92] re-trigger CI From 0084640638dd8f949c079002f34294e8b6550eb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 9 May 2025 17:11:52 +0200 Subject: [PATCH 62/92] Add torch import --- sklearn/externals/array_api_compat/torch/linalg.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/externals/array_api_compat/torch/linalg.py b/sklearn/externals/array_api_compat/torch/linalg.py index e26198b9b562e..3ddf4d009248d 100644 --- a/sklearn/externals/array_api_compat/torch/linalg.py +++ b/sklearn/externals/array_api_compat/torch/linalg.py @@ -11,6 +11,7 @@ from ._aliases import _fix_promotion, sum from torch.linalg import * # noqa: F403 +import torch # torch.linalg doesn't define __all__ # from torch.linalg import __all__ as linalg_all From f9b2946db36902cffabac0c94dbb17cc0727e8ea Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Wed, 14 May 2025 11:40:42 +0200 Subject: [PATCH 63/92] different branch for numpy.linalg; only re-raise numpy error --- sklearn/mixture/_gaussian_mixture.py | 46 ++++++++++++++++------------ 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 8326d8f88bc7b..8dd502f1c9371 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -4,8 +4,11 @@ # SPDX-License-Identifier: BSD-3-Clause import math +import numpy as np from scipy import linalg +from sklearn.externals.array_api_compat.common._helpers import is_numpy_namespace + from .._config import get_config from ..externals import array_api_extra as xpx from ..utils import check_array @@ -316,6 +319,20 @@ def _estimate_gaussian_parameters(X, resp, reg_covar, covariance_type, xp=None): return nk, means, covariances +def _call_cholesky(covariance, xp): + if is_numpy_namespace(xp): + return linalg.cholesky(covariance, lower=True) + else: + return xp.linalg.cholesky(covariance) + + +def _call_solve(cov_chol, eye_matrix, xp): + if is_numpy_namespace(xp): + return linalg.solve_triangular(cov_chol, eye_matrix, lower=True) + else: + return xp.linalg.solve(cov_chol, eye_matrix) + + def _compute_precision_cholesky(covariances, covariance_type, xp=None): """Compute the Cholesky decomposition of the precisions. @@ -357,31 +374,22 @@ def _compute_precision_cholesky(covariances, covariance_type, xp=None): for k in range(covariances.shape[0]): covariance = covariances[k, :, :] try: - # TODO we are using xp.linalg instead of scipy.linalg.cholesky, maybe - # separate branches for array API and numpy? - cov_chol = xp.linalg.cholesky(covariance) - except xp.linalg.LinAlgError: + cov_chol = _call_cholesky(covariance, xp) + # catch only numpy exceptions, b/c exceptions aren't part of array api spec + except np.linalg.LinAlgError: raise ValueError(estimate_precision_error_message) - - # TODO we are using xp.linalg.solve instead of scipy.linalg.solve_triangular - # probably separate branches for array API and numpy? maybe - # https://github.com/scikit-learn/scikit-learn/pull/29318 is relevant - precisions_chol[k, :, :] = xp.linalg.solve( - cov_chol, xp.eye(n_features, device=device_, dtype=dtype) + precisions_chol[k, :, :] = _call_solve( + cov_chol, xp.eye(n_features, dtype=dtype, device=device_), xp ).T elif covariance_type == "tied": _, n_features = covariances.shape try: - # TODO we are using xp.linalg instead of scipy.linalg.cholesky, maybe - # separate branches for array API and numpy? - cov_chol = xp.linalg.cholesky(covariances) - except linalg.LinAlgError: + cov_chol = _call_cholesky(covariances, xp) + # catch only numpy exceptions, since exceptions are not part of array api spec + except np.linalg.LinAlgError: raise ValueError(estimate_precision_error_message) - # TODO we are using xp.linalg.solve instead of scipy.linalg.solve_triangular - # probably separate branches for array API and numpy? maybe - # https://github.com/scikit-learn/scikit-learn/pull/29318 is relevant - precisions_chol = xp.linalg.solve( - cov_chol, xp.eye(n_features, dtype=dtype, device=device_) + precisions_chol = _call_solve( + cov_chol, xp.eye(n_features, dtype=dtype, device=device_), xp ).T else: if xp.any(covariances <= 0.0): From adc992e1530da523e0d15df3d33642693b40157e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Wed, 14 May 2025 11:43:23 +0200 Subject: [PATCH 64/92] Remove comment --- sklearn/mixture/_gaussian_mixture.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 8dd502f1c9371..076723cc6808d 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -869,8 +869,6 @@ def _initialize(self, X, resp, xp=None): resp : array-like of shape (n_samples, n_components) """ - # TODO: check if device_ should be computed in fit_predict and passed down the - # call chain xp, _, device_ = get_namespace_and_device(X, xp=xp) n_samples, _ = X.shape weights, means, covariances = None, None, None From 0bb750cb4e828eadec24c4c1319456d98e04e8a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Thu, 15 May 2025 14:54:14 +0200 Subject: [PATCH 65/92] Remove script --- gmm-array-api.py | 72 ------------------------------------------------ 1 file changed, 72 deletions(-) delete mode 100644 gmm-array-api.py diff --git a/gmm-array-api.py b/gmm-array-api.py deleted file mode 100644 index f0da95a8aca9e..0000000000000 --- a/gmm-array-api.py +++ /dev/null @@ -1,72 +0,0 @@ -# Authors: The scikit-learn developers -# SPDX-License-Identifier: BSD-3-Clause - -# %% - -import os - -import array_api_strict -import matplotlib as mpl -import matplotlib.pyplot as plt -import numpy as np - -import sklearn -from sklearn.datasets import make_blobs -from sklearn.mixture import GaussianMixture - -os.environ["SCIPY_ARRAY_API"] = "1" - -X, y = make_blobs(n_samples=int(1e3), n_features=2, centers=3, random_state=0) -# X, y = torch.asarray(X), torch.asarray(y) -X, y = array_api_strict.asarray(X), array_api_strict.asarray(y) - -sklearn.set_config(array_api_dispatch=True) - -gmm = GaussianMixture( - n_components=3, - covariance_type="diag", - random_state=0, - init_params="random", - tol=1e-5, - max_iter=1000, -).fit(X) -print(gmm.means_) -print(gmm.covariances_) - -fig, ax = plt.subplots() - -X = np.asarray(X) -y = np.asarray(y) - -ax.scatter(X[:, 0], X[:, 1], c=y) - - -def make_ellipses(gmm, ax): - gmm.covariances_ = np.asarray(gmm.covariances_) - colors = ["navy", "turquoise", "darkorange"] - for n, color in enumerate(colors): - if gmm.covariance_type == "full": - covariances = gmm.covariances_[n][:2, :2] - elif gmm.covariance_type == "tied": - covariances = gmm.covariances_[:2, :2] - elif gmm.covariance_type == "diag": - covariances = np.diag(gmm.covariances_[n][:2]) - elif gmm.covariance_type == "spherical": - covariances = np.eye(gmm.means_.shape[1]) * gmm.covariances_[n] - v, w = np.linalg.eigh(covariances) - u = w[0] / np.linalg.norm(w[0]) - angle = np.arctan2(u[1], u[0]) - angle = 180 * angle / np.pi # convert to degrees - v = 2.0 * np.sqrt(2.0) * np.sqrt(v) - ell = mpl.patches.Ellipse( - gmm.means_[n, :2], v[0], v[1], angle=180 + angle, color=color - ) - ell.set_clip_box(ax.bbox) - ell.set_alpha(0.5) - ax.add_artist(ell) - ax.set_aspect("equal", "datalim") - - -make_ellipses(gmm, ax) - -# %% From 7874231810fb8de4956fd40adf7e9ac4c45f5800 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Thu, 15 May 2025 17:10:48 +0200 Subject: [PATCH 66/92] update TODOs --- sklearn/externals/array_api_compat/torch/linalg.py | 4 ++++ sklearn/mixture/tests/test_gaussian_mixture.py | 5 +++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/sklearn/externals/array_api_compat/torch/linalg.py b/sklearn/externals/array_api_compat/torch/linalg.py index 3ddf4d009248d..768559bf8aa32 100644 --- a/sklearn/externals/array_api_compat/torch/linalg.py +++ b/sklearn/externals/array_api_compat/torch/linalg.py @@ -11,6 +11,10 @@ from ._aliases import _fix_promotion, sum from torch.linalg import * # noqa: F403 +# TODO Temporary work-around for +# https://github.com/data-apis/array-api-compat/issues/320. Remove when +# array-api-compat 1.12 is released and our vendored array-api-compat has been +# updated. import torch # torch.linalg doesn't define __all__ diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index 38be79124e43c..73a209a5559ae 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1515,7 +1515,8 @@ def test_gaussian_mixture_array_api_compliance( assert device(X) == device(gmm.covariances_) # smoke test other methods - # TODO: maybe test with X on different namespace/device as training + # TODO compare with same method on numpy + # TODO add predict and predict_proba gmm.score_samples(X) gmm.score(X) gmm.aic(X) @@ -1597,7 +1598,6 @@ def test_gaussian_mixture_array_api_compliance_with_weights_init( # gmm.fit(X) -# TODO: remove when gmm works with `init_params` `kmeans` or `k-means++` @skip_if_array_api_compat_not_configured @pytest.mark.parametrize("init_params", ["kmeans", "k-means++"]) @pytest.mark.parametrize( @@ -1631,6 +1631,7 @@ def test_gaussian_mixture_sample_array_api_compliance( covariance_type, array_namespace, device_, dtype, global_random_seed ): """Test that array api works in GaussianMixture.sample().""" + # TODO move this to test_gaussian_mixture_array_api_compliance function? xp = _array_api_for_tests(array_namespace, device_) X, _ = make_blobs( n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed From 96d8d8ccd2970c780454e2eb306bcc9b3d116e1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Thu, 15 May 2025 17:45:16 +0200 Subject: [PATCH 67/92] only use X array namespace at prediction time --- sklearn/mixture/_base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index 7b4f98d04797c..aa4ce15058dba 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -387,8 +387,7 @@ def score(self, X, y=None): Log-likelihood of `X` under the Gaussian mixture model. """ check_is_fitted(self) - # check if X is on the same namespace as fitted attributes: - xp, _ = get_namespace(X, self.means_) + xp, _ = get_namespace(X) return xp.mean(self.score_samples(X)) def predict(self, X): From 27a8cd25de69b66e3efc067bb4bb94a40d1eb7c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Thu, 15 May 2025 17:45:56 +0200 Subject: [PATCH 68/92] Fix predict --- sklearn/mixture/_base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index aa4ce15058dba..d0fd91d8c07e2 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -405,8 +405,9 @@ def predict(self, X): Component labels. """ check_is_fitted(self) + xp, _ = get_namespace(X) X = validate_data(self, X, reset=False) - return self._estimate_weighted_log_prob(X).argmax(axis=1) + return xp.argmax(self._estimate_weighted_log_prob(X)) def predict_proba(self, X): """Evaluate the components' density for each sample. From 4c6271570f9acf821954f4a46eaf3c9ac0a5b9be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Thu, 15 May 2025 17:57:26 +0200 Subject: [PATCH 69/92] remove TODO --- sklearn/mixture/_bayesian_mixture.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/sklearn/mixture/_bayesian_mixture.py b/sklearn/mixture/_bayesian_mixture.py index d5eace5433be5..76589c8214a99 100644 --- a/sklearn/mixture/_bayesian_mixture.py +++ b/sklearn/mixture/_bayesian_mixture.py @@ -417,9 +417,6 @@ def _check_parameters(self, X, xp=None): ---------- X : array-like of shape (n_samples, n_features) """ - # TODO should we pass xp to the check functions in other words - # should we test BayesianGaussianMixture array API support? - # Maybe we should leave it for a further PR self._check_weights_parameters() self._check_means_parameters(X) self._check_precision_parameters(X) From 303f392fd0175c3b2c50dccfe23111e012e73a65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Thu, 15 May 2025 21:20:57 +0200 Subject: [PATCH 70/92] Fix --- sklearn/mixture/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index d0fd91d8c07e2..30fd42ec50f10 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -407,7 +407,7 @@ def predict(self, X): check_is_fitted(self) xp, _ = get_namespace(X) X = validate_data(self, X, reset=False) - return xp.argmax(self._estimate_weighted_log_prob(X)) + return xp.argmax(self._estimate_weighted_log_prob(X), axis=1) def predict_proba(self, X): """Evaluate the components' density for each sample. From c232e39d0d08e039887194ede3d0d9f9dd2dcb1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 16 May 2025 09:06:55 +0200 Subject: [PATCH 71/92] Better variable name --- sklearn/mixture/_gaussian_mixture.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 076723cc6808d..31e3b27fc1766 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -190,8 +190,8 @@ def _estimate_gaussian_covariances_full(resp, X, nk, means, reg_covar, xp=None): for k in range(n_components): diff = X - means[k, :] covariances[k, :, :] = ((resp[:, k] * diff.T) @ diff) / nk[k] - my_flat = xp.reshape(covariances[k, :, :], (-1,)) - my_flat[:: n_features + 1] += reg_covar + covariances_flat = xp.reshape(covariances[k, :, :], (-1,)) + covariances_flat[:: n_features + 1] += reg_covar return covariances @@ -220,8 +220,8 @@ def _estimate_gaussian_covariances_tied(resp, X, nk, means, reg_covar, xp=None): avg_means2 = nk * means.T @ means covariance = avg_X2 - avg_means2 covariance /= xp.sum(nk) - my_flat = xp.reshape(covariance, (-1,)) - my_flat[:: covariance.shape[0] + 1] += reg_covar + covariance_flat = xp.reshape(covariance, (-1,)) + covariance_flat[:: covariance.shape[0] + 1] += reg_covar return covariance From a43eeb2863457a0200a1543e57f200e6b1cb2621 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 16 May 2025 09:27:27 +0200 Subject: [PATCH 72/92] Simplify with math.log --- sklearn/mixture/_gaussian_mixture.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 31e3b27fc1766..d193cd2955159 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -561,14 +561,7 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type, xp=N ) # Since we are using the precision of the Cholesky decomposition, # `- 0.5 * log_det_precision` becomes `+ log_det_precision_chol` - return ( - -0.5 - * ( - n_features * xp.log(xp.asarray(2 * xp.pi, dtype=X.dtype, device=device_)) - + log_prob - ) - + log_det - ) + return -0.5 * (n_features * math.log(2 * xp.pi) + log_prob) + log_det class GaussianMixture(BaseMixture): From 3a72ec90329cb4ae43b5c46ab4cc1997f77d4df7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 16 May 2025 14:14:39 +0200 Subject: [PATCH 73/92] Use math.pi --- sklearn/mixture/_gaussian_mixture.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index d193cd2955159..2c26312e124de 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -561,7 +561,7 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type, xp=N ) # Since we are using the precision of the Cholesky decomposition, # `- 0.5 * log_det_precision` becomes `+ log_det_precision_chol` - return -0.5 * (n_features * math.log(2 * xp.pi) + log_prob) + log_det + return -0.5 * (n_features * math.log(2 * math.pi) + log_prob) + log_det class GaussianMixture(BaseMixture): From 8f4079fa0332c3dc35fbbd19b173d96defa98ddd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 16 May 2025 15:33:09 +0200 Subject: [PATCH 74/92] Improve tests + make score return float --- sklearn/mixture/_base.py | 6 +- .../mixture/tests/test_gaussian_mixture.py | 95 +++++++++---------- 2 files changed, 49 insertions(+), 52 deletions(-) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index 30fd42ec50f10..929a4655fe688 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -139,8 +139,10 @@ def _initialize_parameters(self, X, random_state, xp=None): indices = random_state.choice( n_samples, size=self.n_components, replace=False ) - # TODO: instead of for-loop, find something more efficient; previous code: + # TODO: when array API supports __setitem__ with fancy indexing we + # can use the previous code: # resp[indices, xp.arange(self.n_components)] = 1 + # Until we use a for loop one on dimension. for count, index in enumerate(indices): resp[index, count] = 1 elif self.init_params == "k-means++": @@ -388,7 +390,7 @@ def score(self, X, y=None): """ check_is_fitted(self) xp, _ = get_namespace(X) - return xp.mean(self.score_samples(X)) + return float(xp.mean(self.score_samples(X))) def predict(self, X): """Predict the labels for the data samples in X using trained model. diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index 73a209a5559ae..88651a982331a 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1502,28 +1502,55 @@ def test_gaussian_mixture_array_api_compliance( ) gmm.fit(X) - means_ = gmm.means_ - covariances_ = gmm.covariances_ xp = _array_api_for_tests(array_namespace, device_) - X = xp.asarray(X, device=device_) + X_xp = xp.asarray(X, device=device_) with sklearn.config_context(array_api_dispatch=True): - gmm.fit(X) - - assert device(X) == device(gmm.means_) - assert device(X) == device(gmm.covariances_) - - # smoke test other methods - # TODO compare with same method on numpy - # TODO add predict and predict_proba - gmm.score_samples(X) - gmm.score(X) - gmm.aic(X) - gmm.bic(X) - - assert_allclose(means_, _convert_to_numpy(gmm.means_, xp=xp)) - assert_allclose(covariances_, _convert_to_numpy(gmm.covariances_, xp=xp)) + gmm_xp = sklearn.clone(gmm) + gmm_xp.fit(X_xp) + + assert get_namespace(gmm_xp.means_)[0] == xp + assert get_namespace(gmm_xp.covariances_)[0] == xp + assert device(gmm_xp.means_) == device(X_xp) + assert device(gmm_xp.covariances_) == device(X_xp) + + xp_predict = gmm_xp.predict(X_xp) + xp_predict_proba = gmm_xp.predict_proba(X_xp) + xp_score_samples = gmm_xp.score_samples(X_xp) + xp_score = gmm_xp.score(X_xp) + xp_aic = gmm_xp.aic(X_xp) + xp_bic = gmm_xp.bic(X_xp) + xp_sample_X, xp_sample_y = gmm_xp.sample(10) + + results = [ + xp_predict, + xp_predict_proba, + xp_score_samples, + xp_sample_X, + xp_sample_y, + ] + for result in results: + assert get_namespace(result)[0] == xp + assert device(result) == device(X_xp) + + for score in [xp_score, xp_aic, xp_bic]: + assert isinstance(score, float) + + # Check methods + assert_allclose(gmm.predict(X), _convert_to_numpy(xp_predict, xp=xp)) + assert_allclose(gmm.predict_proba(X), _convert_to_numpy(xp_predict_proba, xp=xp)) + assert_allclose(gmm.score_samples(X), _convert_to_numpy(xp_score_samples, xp=xp)) + assert_allclose(gmm.score(X), xp_score) + assert_allclose(gmm.aic(X), xp_aic) + assert_allclose(gmm.bic(X), xp_bic) + sample_X, sample_y = gmm.sample(10) + assert_allclose(sample_X, _convert_to_numpy(xp_sample_X, xp=xp)) + assert_allclose(sample_y, _convert_to_numpy(xp_sample_y, xp=xp)) + + # Check fitted attributes + assert_allclose(gmm.means_, _convert_to_numpy(gmm_xp.means_, xp=xp)) + assert_allclose(gmm.covariances_, _convert_to_numpy(gmm_xp.covariances_, xp=xp)) # TODO test means_init and precisions_init @@ -1621,35 +1648,3 @@ def test_gaussian_mixture_raises_where_array_api_not_implemented( match="Allowed `init_params`.+if 'array_api_dispatch' is enabled", ): gmm.fit(X) - - -@pytest.mark.parametrize("covariance_type", ["full", "tied", "diag"]) -@pytest.mark.parametrize( - "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations() -) -def test_gaussian_mixture_sample_array_api_compliance( - covariance_type, array_namespace, device_, dtype, global_random_seed -): - """Test that array api works in GaussianMixture.sample().""" - # TODO move this to test_gaussian_mixture_array_api_compliance function? - xp = _array_api_for_tests(array_namespace, device_) - X, _ = make_blobs( - n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed - ) - X = xp.asarray(X, device=device_) - - with sklearn.config_context(array_api_dispatch=True): - gmm = GaussianMixture( - n_components=3, - covariance_type=covariance_type, - random_state=global_random_seed, - init_params="random", - ) - gmm.fit(X) - X_sample, y_sample = gmm.sample() - - assert get_namespace(X_sample)[0] == xp - assert get_namespace(y_sample)[0] == xp - - assert device(X_sample) == device(X) - assert device(y_sample) == device(X) From de1e5750fae0ce9ed0674e721d56d2595b23e565 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 16 May 2025 15:49:06 +0200 Subject: [PATCH 75/92] List GaussianMixture in the estimators supporting array API --- doc/modules/array_api.rst | 2 ++ doc/whats_new/upcoming_changes/array-api/30777.feature.rst | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/modules/array_api.rst b/doc/modules/array_api.rst index d24ce3573e7b6..4487a6a599790 100644 --- a/doc/modules/array_api.rst +++ b/doc/modules/array_api.rst @@ -117,6 +117,8 @@ Estimators - :class:`preprocessing.MaxAbsScaler` - :class:`preprocessing.MinMaxScaler` - :class:`preprocessing.Normalizer` +- :class:`mixture.GaussianMixture` (with `init_params="random"` or + `init_params="random_from_data"` and `warm_start=False`) Meta-estimators --------------- diff --git a/doc/whats_new/upcoming_changes/array-api/30777.feature.rst b/doc/whats_new/upcoming_changes/array-api/30777.feature.rst index b3f0751fa0a0d..ab3510a72e6d3 100644 --- a/doc/whats_new/upcoming_changes/array-api/30777.feature.rst +++ b/doc/whats_new/upcoming_changes/array-api/30777.feature.rst @@ -1,4 +1,4 @@ - :class:`sklearn.gaussian_mixture.GaussianMixture` with - `init_params` `"random"` or `"random_from_data"` and `warm_start=False` now supports - Array API compatible inputs. + `init_params="random"` or `init_params="random_from_data"` and + `warm_start=False` now supports Array API compatible inputs. By :user:`Stefanie Senger ` and :user:`Loïc Estève ` From 910aa1f500ee67fa1b373a05cadc52042d2e6b97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Tue, 20 May 2025 14:11:12 +0200 Subject: [PATCH 76/92] Remove temporary array-api-compat work-around --- sklearn/externals/array_api_compat/torch/linalg.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sklearn/externals/array_api_compat/torch/linalg.py b/sklearn/externals/array_api_compat/torch/linalg.py index 9f4a9ab4d69cc..70d7240500ce4 100644 --- a/sklearn/externals/array_api_compat/torch/linalg.py +++ b/sklearn/externals/array_api_compat/torch/linalg.py @@ -4,11 +4,6 @@ from typing import Optional, Union, Tuple from torch.linalg import * # noqa: F403 -# TODO Temporary work-around for -# https://github.com/data-apis/array-api-compat/issues/320. Remove when -# array-api-compat 1.12 is released and our vendored array-api-compat has been -# updated. -import torch # torch.linalg doesn't define __all__ # from torch.linalg import __all__ as linalg_all From 4fe376655027fd24dbd22b7590f760d05b909a41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 6 Jun 2025 11:30:23 +0200 Subject: [PATCH 77/92] lint --- sklearn/utils/tests/test_array_api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py index 16bc41a9d39b3..a36ab3248be0d 100644 --- a/sklearn/utils/tests/test_array_api.py +++ b/sklearn/utils/tests/test_array_api.py @@ -636,8 +636,8 @@ def test_median(namespace, device, dtype_name, axis): assert get_namespace(result_xp)[0] == xp assert result_xp.device == X_xp.device assert_allclose(result_np, _convert_to_numpy(result_xp, xp=xp)) - - + + @pytest.mark.parametrize( "array_namespace, device_, dtype_name", yield_namespace_device_dtype_combinations() ) From ce214a6cb6d4b6303599440c78fcdb6c64970bb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 13 Jun 2025 14:18:06 +0200 Subject: [PATCH 78/92] Revert changes to test_bayesian_mixture.py --- sklearn/mixture/tests/test_bayesian_mixture.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/sklearn/mixture/tests/test_bayesian_mixture.py b/sklearn/mixture/tests/test_bayesian_mixture.py index 357c21dc702ba..d36543903cb87 100644 --- a/sklearn/mixture/tests/test_bayesian_mixture.py +++ b/sklearn/mixture/tests/test_bayesian_mixture.py @@ -12,7 +12,6 @@ from sklearn.mixture import BayesianGaussianMixture from sklearn.mixture._bayesian_mixture import _log_dirichlet_norm, _log_wishart_norm from sklearn.mixture.tests.test_gaussian_mixture import RandomData -from sklearn.utils._array_api import get_namespace from sklearn.utils._testing import ( assert_almost_equal, assert_array_equal, @@ -260,7 +259,6 @@ def test_compare_covar_type(): rand_data = RandomData(rng, scale=7) X = rand_data.X["full"] n_components = rand_data.n_components - xp, _ = get_namespace(X) for prior_type in PRIOR_TYPE: # Computation of the full_covariance @@ -273,7 +271,7 @@ def test_compare_covar_type(): tol=1e-7, ) bgmm._check_parameters(X) - bgmm._initialize_parameters(X, np.random.RandomState(0), xp=xp) + bgmm._initialize_parameters(X, np.random.RandomState(0)) full_covariances = ( bgmm.covariances_ * bgmm.degrees_of_freedom_[:, np.newaxis, np.newaxis] ) @@ -288,7 +286,7 @@ def test_compare_covar_type(): tol=1e-7, ) bgmm._check_parameters(X) - bgmm._initialize_parameters(X, np.random.RandomState(0), xp=xp) + bgmm._initialize_parameters(X, np.random.RandomState(0)) tied_covariance = bgmm.covariances_ * bgmm.degrees_of_freedom_ assert_almost_equal(tied_covariance, np.mean(full_covariances, 0)) @@ -303,7 +301,7 @@ def test_compare_covar_type(): tol=1e-7, ) bgmm._check_parameters(X) - bgmm._initialize_parameters(X, np.random.RandomState(0), xp=xp) + bgmm._initialize_parameters(X, np.random.RandomState(0)) diag_covariances = bgmm.covariances_ * bgmm.degrees_of_freedom_[:, np.newaxis] assert_almost_equal( @@ -320,7 +318,7 @@ def test_compare_covar_type(): tol=1e-7, ) bgmm._check_parameters(X) - bgmm._initialize_parameters(X, np.random.RandomState(0), xp=xp) + bgmm._initialize_parameters(X, np.random.RandomState(0)) spherical_covariances = bgmm.covariances_ * bgmm.degrees_of_freedom_ assert_almost_equal(spherical_covariances, np.mean(diag_covariances, 1)) From a69cd62d140b8e54a6c27de7cd82cf32efd80f52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 13 Jun 2025 14:34:12 +0200 Subject: [PATCH 79/92] Remove unnecessary check_is_fitted --- sklearn/mixture/_base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index 929a4655fe688..776e3d4a79f3d 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -388,7 +388,6 @@ def score(self, X, y=None): log_likelihood : float Log-likelihood of `X` under the Gaussian mixture model. """ - check_is_fitted(self) xp, _ = get_namespace(X) return float(xp.mean(self.score_samples(X))) From 1a0e33be2b716deac67ab9b8e2df0090796534f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 13 Jun 2025 16:39:24 +0200 Subject: [PATCH 80/92] Add all array constructor params to test --- .../mixture/tests/test_gaussian_mixture.py | 29 ++++++++++++++----- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index 88651a982331a..44d0d73bf5a0a 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -32,6 +32,7 @@ ) from sklearn.utils._array_api import ( _convert_to_numpy, + _get_namespace_device_dtype_ids, device, get_namespace, yield_namespace_device_dtype_combinations, @@ -1485,7 +1486,9 @@ def test_gaussian_mixture_all_init_does_not_estimate_gaussian_parameters( @pytest.mark.parametrize("init_params", ["random", "random_from_data"]) @pytest.mark.parametrize("covariance_type", ["full", "tied", "diag", "spherical"]) @pytest.mark.parametrize( - "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations() + "array_namespace, device_, dtype", + yield_namespace_device_dtype_combinations(), + ids=_get_namespace_device_dtype_ids, ) def test_gaussian_mixture_array_api_compliance( init_params, covariance_type, array_namespace, device_, dtype, global_random_seed @@ -1552,29 +1555,39 @@ def test_gaussian_mixture_array_api_compliance( assert_allclose(gmm.means_, _convert_to_numpy(gmm_xp.means_, xp=xp)) assert_allclose(gmm.covariances_, _convert_to_numpy(gmm_xp.covariances_, xp=xp)) - # TODO test means_init and precisions_init - @pytest.mark.parametrize( - "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations() + "array_namespace, device_, dtype", + yield_namespace_device_dtype_combinations(), + ids=_get_namespace_device_dtype_ids, ) -def test_gaussian_mixture_array_api_compliance_with_weights_init( +def test_gaussian_mixture_array_api_compliance_with_array_like_constructor_parameters( array_namespace, device_, dtype, global_random_seed ): """Check that array api works with `weights_init`, which unlike other passed arrays is an init param.""" + n_features = 2 + n_components = 3 X, _ = make_blobs( - n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed + n_samples=int(1e3), + n_features=n_features, + centers=3, + random_state=global_random_seed, ) + X = X.astype(dtype) xp = _array_api_for_tests(array_namespace, device_) X = xp.asarray(X, device=device_) + means_init = xp.zeros((n_components, n_features), device=device_, dtype=X.dtype) + precisions_init = xp.ones((n_components, n_features), device=device_, dtype=X.dtype) gmm = GaussianMixture( n_components=3, covariance_type="diag", random_state=global_random_seed, init_params="random", + means_init=means_init, + precisions_init=precisions_init, weights_init=xp.asarray([0.1, 0.4, 0.5]), ) @@ -1628,7 +1641,9 @@ def test_gaussian_mixture_array_api_compliance_with_weights_init( @skip_if_array_api_compat_not_configured @pytest.mark.parametrize("init_params", ["kmeans", "k-means++"]) @pytest.mark.parametrize( - "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations() + "array_namespace, device_, dtype", + yield_namespace_device_dtype_combinations(), + ids=_get_namespace_device_dtype_ids, ) def test_gaussian_mixture_raises_where_array_api_not_implemented( init_params, array_namespace, device_, dtype From 1dca29ac48fa9945f7d33b86e7dc7960fea86ffc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 13 Jun 2025 16:59:58 +0200 Subject: [PATCH 81/92] [azure parallel] tweak docstring --- sklearn/mixture/tests/test_gaussian_mixture.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index 44d0d73bf5a0a..e212572ecd68c 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1564,8 +1564,9 @@ def test_gaussian_mixture_array_api_compliance( def test_gaussian_mixture_array_api_compliance_with_array_like_constructor_parameters( array_namespace, device_, dtype, global_random_seed ): - """Check that array api works with `weights_init`, which unlike other passed arrays - is an init param.""" + """Check that array api works with array-like constructors: 'means_init', + 'precisions_init' and 'weights_init' + """ n_features = 2 n_components = 3 X, _ = make_blobs( From b990682893dba158fd708fc03a76a626501833fd Mon Sep 17 00:00:00 2001 From: Omar Salman Date: Sat, 14 Jun 2025 14:38:29 +0500 Subject: [PATCH 82/92] Update sklearn/utils/_array_api.py Co-authored-by: Olivier Grisel --- sklearn/utils/_array_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py index 9a3e0c01d4aaa..3a318ffd60e80 100644 --- a/sklearn/utils/_array_api.py +++ b/sklearn/utils/_array_api.py @@ -1032,7 +1032,7 @@ def _tolist(array, xp=None): def _logsumexp(array, axis=None, xp=None): # TODO replace by scipy.special.logsumexp when - # https://github.com/scipy/scipy/pull/22683 is in a relase + # https://github.com/scipy/scipy/pull/22683 is part of a release. # The following code is strongly inspired and simplified from # scipy.special._logsumexp.logsumexp xp, _, device = get_namespace_and_device(array, xp=xp) From 72cd185c07b52469c938e34b7f41756cc48353f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Mon, 16 Jun 2025 14:25:56 +0200 Subject: [PATCH 83/92] Remove commented out test --- .../mixture/tests/test_gaussian_mixture.py | 41 ------------------- 1 file changed, 41 deletions(-) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index e212572ecd68c..b7e2847710438 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1598,47 +1598,6 @@ def test_gaussian_mixture_array_api_compliance_with_array_like_constructor_param assert device(X) == device(gmm.weights_) -# TODO What is the expected behavior when weights init -# and X are not in the same namespace/device? -# It feels like check_array would need a xp argument? -# @pytest.mark.parametrize( -# "array_namespace, device_, dtype", yield_namespace_device_dtype_combinations() -# ) -# def test_gaussian_mixture_array_api_different_namespaces( -# array_namespace, device_, dtype, global_random_seed -# ): -# """Check that passing `weights_init` in a different namespace during instantiation -# correctly converts to the same namespace as X.""" -# X, _ = make_blobs( -# n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed -# ) - -# xp = _array_api_for_tests(array_namespace, device_) -# X = xp.asarray(X, device=device_) - -# # check with weights_init being a numpy array -# with sklearn.config_context(array_api_dispatch=True): -# gmm = GaussianMixture( -# n_components=3, -# covariance_type="diag", -# random_state=global_random_seed, -# init_params="random", -# weights_init=np.asarray([0.1, 0.4, 0.5]), -# ) -# gmm.fit(X) - -# # check with weights_init being an array_api_strict array -# with sklearn.config_context(array_api_dispatch=True): -# gmm = GaussianMixture( -# n_components=3, -# covariance_type="diag", -# random_state=global_random_seed, -# init_params="random", -# weights_init=array_api_strict.asarray([0.1, 0.4, 0.5]), -# ) -# gmm.fit(X) - - @skip_if_array_api_compat_not_configured @pytest.mark.parametrize("init_params", ["kmeans", "k-means++"]) @pytest.mark.parametrize( From 3af1470e9f0bc00cf4c3949b952465193b030d2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Mon, 16 Jun 2025 14:36:40 +0200 Subject: [PATCH 84/92] Handle comments --- sklearn/mixture/_base.py | 6 +++--- sklearn/utils/tests/test_array_api.py | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index 776e3d4a79f3d..a9627a0e74e7f 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -142,9 +142,9 @@ def _initialize_parameters(self, X, random_state, xp=None): # TODO: when array API supports __setitem__ with fancy indexing we # can use the previous code: # resp[indices, xp.arange(self.n_components)] = 1 - # Until we use a for loop one on dimension. - for count, index in enumerate(indices): - resp[index, count] = 1 + # Until then we use a for loop on one dimension. + for col, index in enumerate(indices): + resp[index, col] = 1 elif self.init_params == "k-means++": resp = np.zeros((n_samples, self.n_components), dtype=X.dtype) _, indices = kmeans_plusplus( diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py index a36ab3248be0d..5d35d86432f3c 100644 --- a/sklearn/utils/tests/test_array_api.py +++ b/sklearn/utils/tests/test_array_api.py @@ -662,7 +662,6 @@ def test_logsumexp_like_scipy_logsumexp(array_namespace, device_, dtype_name, ax # if torch on CPU or array api strict on default device # check that _logsumexp works when array API dispatch is disabled - # TODO is there a better way for this if (array_namespace == "torch" and device_ == "cpu") or ( array_namespace == "array_api_strict" and "CPU" in str(device_) ): From ecac6103404c575297182c5e519e74235512d9c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Mon, 16 Jun 2025 17:29:28 +0200 Subject: [PATCH 85/92] use _call_cholesky --- sklearn/mixture/_gaussian_mixture.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 2c26312e124de..f48b1211660a0 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -441,13 +441,13 @@ def _compute_precision_cholesky_from_precisions(precisions, covariance_type, xp= if covariance_type == "full": precisions_cholesky = xp.asarray( [ - _flipudlr(xp.linalg.cholesky(_flipudlr(precision, xp=xp)), xp=xp) + _flipudlr(_call_cholesky(_flipudlr(precision, xp=xp), xp=xp), xp=xp) for precision in precisions ] ) elif covariance_type == "tied": precisions_cholesky = _flipudlr( - xp.linalg.cholesky(_flipudlr(precisions, xp=xp)), xp=xp + _call_cholesky(_flipudlr(precisions, xp=xp), xp=xp), xp=xp ) else: precisions_cholesky = xp.sqrt(precisions) From 341b659b1433bacb3c9704e4b6c2f517d03308fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Wed, 18 Jun 2025 11:14:51 +0200 Subject: [PATCH 86/92] More explicit use of scipy.linalg --- sklearn/mixture/_gaussian_mixture.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index f48b1211660a0..df9c50bfaa0bf 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -5,7 +5,7 @@ import math import numpy as np -from scipy import linalg +import scipy.linalg from sklearn.externals.array_api_compat.common._helpers import is_numpy_namespace @@ -321,14 +321,14 @@ def _estimate_gaussian_parameters(X, resp, reg_covar, covariance_type, xp=None): def _call_cholesky(covariance, xp): if is_numpy_namespace(xp): - return linalg.cholesky(covariance, lower=True) + return scipy.linalg.cholesky(covariance, lower=True) else: return xp.linalg.cholesky(covariance) def _call_solve(cov_chol, eye_matrix, xp): if is_numpy_namespace(xp): - return linalg.solve_triangular(cov_chol, eye_matrix, lower=True) + return scipy.linalg.solve_triangular(cov_chol, eye_matrix, lower=True) else: return xp.linalg.solve(cov_chol, eye_matrix) From 7ffc5c7f5410ef39739c5f8693ff39384788a035 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Wed, 18 Jun 2025 12:51:26 +0200 Subject: [PATCH 87/92] [azure parallel] Increase rtol for float32 tests + some minor cleanups --- sklearn/mixture/_gaussian_mixture.py | 36 ++++-------- .../mixture/tests/test_gaussian_mixture.py | 55 +++++++++++++------ sklearn/utils/_array_api.py | 14 +++++ 3 files changed, 64 insertions(+), 41 deletions(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index df9c50bfaa0bf..05d42f68decbc 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -5,14 +5,16 @@ import math import numpy as np -import scipy.linalg - -from sklearn.externals.array_api_compat.common._helpers import is_numpy_namespace from .._config import get_config from ..externals import array_api_extra as xpx from ..utils import check_array -from ..utils._array_api import get_namespace, get_namespace_and_device +from ..utils._array_api import ( + _cholesky, + _linalg_solve, + get_namespace, + get_namespace_and_device, +) from ..utils._param_validation import StrOptions from ..utils.extmath import row_norms from ._base import BaseMixture, _check_shape @@ -319,20 +321,6 @@ def _estimate_gaussian_parameters(X, resp, reg_covar, covariance_type, xp=None): return nk, means, covariances -def _call_cholesky(covariance, xp): - if is_numpy_namespace(xp): - return scipy.linalg.cholesky(covariance, lower=True) - else: - return xp.linalg.cholesky(covariance) - - -def _call_solve(cov_chol, eye_matrix, xp): - if is_numpy_namespace(xp): - return scipy.linalg.solve_triangular(cov_chol, eye_matrix, lower=True) - else: - return xp.linalg.solve(cov_chol, eye_matrix) - - def _compute_precision_cholesky(covariances, covariance_type, xp=None): """Compute the Cholesky decomposition of the precisions. @@ -374,21 +362,21 @@ def _compute_precision_cholesky(covariances, covariance_type, xp=None): for k in range(covariances.shape[0]): covariance = covariances[k, :, :] try: - cov_chol = _call_cholesky(covariance, xp) + cov_chol = _cholesky(covariance, xp) # catch only numpy exceptions, b/c exceptions aren't part of array api spec except np.linalg.LinAlgError: raise ValueError(estimate_precision_error_message) - precisions_chol[k, :, :] = _call_solve( + precisions_chol[k, :, :] = _linalg_solve( cov_chol, xp.eye(n_features, dtype=dtype, device=device_), xp ).T elif covariance_type == "tied": _, n_features = covariances.shape try: - cov_chol = _call_cholesky(covariances, xp) + cov_chol = _cholesky(covariances, xp) # catch only numpy exceptions, since exceptions are not part of array api spec except np.linalg.LinAlgError: raise ValueError(estimate_precision_error_message) - precisions_chol = _call_solve( + precisions_chol = _linalg_solve( cov_chol, xp.eye(n_features, dtype=dtype, device=device_), xp ).T else: @@ -441,13 +429,13 @@ def _compute_precision_cholesky_from_precisions(precisions, covariance_type, xp= if covariance_type == "full": precisions_cholesky = xp.asarray( [ - _flipudlr(_call_cholesky(_flipudlr(precision, xp=xp), xp=xp), xp=xp) + _flipudlr(_cholesky(_flipudlr(precision, xp=xp), xp=xp), xp=xp) for precision in precisions ] ) elif covariance_type == "tied": precisions_cholesky = _flipudlr( - _call_cholesky(_flipudlr(precisions, xp=xp), xp=xp), xp=xp + _cholesky(_flipudlr(precisions, xp=xp), xp=xp), xp=xp ) else: precisions_cholesky = xp.sqrt(precisions) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index b7e2847710438..fcbeaf81f1224 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1491,22 +1491,24 @@ def test_gaussian_mixture_all_init_does_not_estimate_gaussian_parameters( ids=_get_namespace_device_dtype_ids, ) def test_gaussian_mixture_array_api_compliance( - init_params, covariance_type, array_namespace, device_, dtype, global_random_seed + init_params, covariance_type, array_namespace, device_, dtype ): """Test that array api works in GaussianMixture.fit().""" - X, _ = make_blobs( - n_samples=int(1e3), n_features=2, centers=3, random_state=global_random_seed - ) + xp = _array_api_for_tests(array_namespace, device_) + + rng = np.random.RandomState(0) + rand_data = RandomData(rng) + X = rand_data.X[covariance_type] + X = X.astype(dtype) + gmm = GaussianMixture( - n_components=3, + n_components=rand_data.n_components, covariance_type=covariance_type, - random_state=global_random_seed, + random_state=0, init_params=init_params, ) - gmm.fit(X) - xp = _array_api_for_tests(array_namespace, device_) X_xp = xp.asarray(X, device=device_) with sklearn.config_context(array_api_dispatch=True): @@ -1541,19 +1543,38 @@ def test_gaussian_mixture_array_api_compliance( assert isinstance(score, float) # Check methods - assert_allclose(gmm.predict(X), _convert_to_numpy(xp_predict, xp=xp)) - assert_allclose(gmm.predict_proba(X), _convert_to_numpy(xp_predict_proba, xp=xp)) - assert_allclose(gmm.score_samples(X), _convert_to_numpy(xp_score_samples, xp=xp)) - assert_allclose(gmm.score(X), xp_score) - assert_allclose(gmm.aic(X), xp_aic) - assert_allclose(gmm.bic(X), xp_bic) + float32_rtol = 1e-4 if dtype == "float32" else 1e-7 + increased_rtol = 5e-4 if dtype == "float32" else 1e-7 + + assert ( + adjusted_rand_score(gmm.predict(X), _convert_to_numpy(xp_predict, xp=xp)) > 0.95 + ) + assert_allclose( + gmm.predict_proba(X), + _convert_to_numpy(xp_predict_proba, xp=xp), + rtol=increased_rtol, + ) + assert_allclose( + gmm.score_samples(X), + _convert_to_numpy(xp_score_samples, xp=xp), + rtol=increased_rtol, + ) + # comparing Python floats so need explicit rtol + assert_allclose(gmm.score(X), xp_score, rtol=float32_rtol) + assert_allclose(gmm.aic(X), xp_aic, rtol=float32_rtol) + assert_allclose(gmm.bic(X), xp_bic, rtol=float32_rtol) sample_X, sample_y = gmm.sample(10) - assert_allclose(sample_X, _convert_to_numpy(xp_sample_X, xp=xp)) + # generated samples are float64 so need explicit rtol for dtype=float32 + assert_allclose(sample_X, _convert_to_numpy(xp_sample_X, xp=xp), rtol=float32_rtol) assert_allclose(sample_y, _convert_to_numpy(xp_sample_y, xp=xp)) # Check fitted attributes assert_allclose(gmm.means_, _convert_to_numpy(gmm_xp.means_, xp=xp)) - assert_allclose(gmm.covariances_, _convert_to_numpy(gmm_xp.covariances_, xp=xp)) + assert_allclose( + gmm.covariances_, + _convert_to_numpy(gmm_xp.covariances_, xp=xp), + rtol=increased_rtol, + ) @pytest.mark.parametrize( @@ -1609,7 +1630,7 @@ def test_gaussian_mixture_raises_where_array_api_not_implemented( init_params, array_namespace, device_, dtype ): X, _ = make_blobs( - n_samples=int(1e3), + n_samples=100, n_features=2, centers=3, ) diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py index 3a318ffd60e80..cbaaa9f5168a9 100644 --- a/sklearn/utils/_array_api.py +++ b/sklearn/utils/_array_api.py @@ -1063,3 +1063,17 @@ def _logsumexp(array, axis=None, xp=None): out = out[()] if out.ndim == 0 else out return out + + +def _cholesky(covariance, xp): + if _is_numpy_namespace(xp): + return scipy.linalg.cholesky(covariance, lower=True) + else: + return xp.linalg.cholesky(covariance) + + +def _linalg_solve(cov_chol, eye_matrix, xp): + if _is_numpy_namespace(xp): + return scipy.linalg.solve_triangular(cov_chol, eye_matrix, lower=True) + else: + return xp.linalg.solve(cov_chol, eye_matrix) From 3b95a5f316f274e027179c588732261adfef0b74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Wed, 18 Jun 2025 13:03:07 +0200 Subject: [PATCH 88/92] rename variables --- .../mixture/tests/test_gaussian_mixture.py | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index fcbeaf81f1224..74e2d9740d234 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1520,26 +1520,26 @@ def test_gaussian_mixture_array_api_compliance( assert device(gmm_xp.means_) == device(X_xp) assert device(gmm_xp.covariances_) == device(X_xp) - xp_predict = gmm_xp.predict(X_xp) - xp_predict_proba = gmm_xp.predict_proba(X_xp) - xp_score_samples = gmm_xp.score_samples(X_xp) - xp_score = gmm_xp.score(X_xp) - xp_aic = gmm_xp.aic(X_xp) - xp_bic = gmm_xp.bic(X_xp) - xp_sample_X, xp_sample_y = gmm_xp.sample(10) + predict_xp = gmm_xp.predict(X_xp) + predict_proba_xp = gmm_xp.predict_proba(X_xp) + score_samples_xp = gmm_xp.score_samples(X_xp) + score_xp = gmm_xp.score(X_xp) + aic_xp = gmm_xp.aic(X_xp) + bic_xp = gmm_xp.bic(X_xp) + sample_X_xp, sample_y_xp = gmm_xp.sample(10) results = [ - xp_predict, - xp_predict_proba, - xp_score_samples, - xp_sample_X, - xp_sample_y, + predict_xp, + predict_proba_xp, + score_samples_xp, + sample_X_xp, + sample_y_xp, ] for result in results: assert get_namespace(result)[0] == xp assert device(result) == device(X_xp) - for score in [xp_score, xp_aic, xp_bic]: + for score in [score_xp, aic_xp, bic_xp]: assert isinstance(score, float) # Check methods @@ -1547,26 +1547,26 @@ def test_gaussian_mixture_array_api_compliance( increased_rtol = 5e-4 if dtype == "float32" else 1e-7 assert ( - adjusted_rand_score(gmm.predict(X), _convert_to_numpy(xp_predict, xp=xp)) > 0.95 + adjusted_rand_score(gmm.predict(X), _convert_to_numpy(predict_xp, xp=xp)) > 0.95 ) assert_allclose( gmm.predict_proba(X), - _convert_to_numpy(xp_predict_proba, xp=xp), + _convert_to_numpy(predict_proba_xp, xp=xp), rtol=increased_rtol, ) assert_allclose( gmm.score_samples(X), - _convert_to_numpy(xp_score_samples, xp=xp), + _convert_to_numpy(score_samples_xp, xp=xp), rtol=increased_rtol, ) # comparing Python floats so need explicit rtol - assert_allclose(gmm.score(X), xp_score, rtol=float32_rtol) - assert_allclose(gmm.aic(X), xp_aic, rtol=float32_rtol) - assert_allclose(gmm.bic(X), xp_bic, rtol=float32_rtol) + assert_allclose(gmm.score(X), score_xp, rtol=float32_rtol) + assert_allclose(gmm.aic(X), aic_xp, rtol=float32_rtol) + assert_allclose(gmm.bic(X), bic_xp, rtol=float32_rtol) sample_X, sample_y = gmm.sample(10) # generated samples are float64 so need explicit rtol for dtype=float32 - assert_allclose(sample_X, _convert_to_numpy(xp_sample_X, xp=xp), rtol=float32_rtol) - assert_allclose(sample_y, _convert_to_numpy(xp_sample_y, xp=xp)) + assert_allclose(sample_X, _convert_to_numpy(sample_X_xp, xp=xp), rtol=float32_rtol) + assert_allclose(sample_y, _convert_to_numpy(sample_y_xp, xp=xp)) # Check fitted attributes assert_allclose(gmm.means_, _convert_to_numpy(gmm_xp.means_, xp=xp)) From 45ba1ee8326afff0e2904b08aadb64ee1cb0617f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Wed, 18 Jun 2025 15:17:07 +0200 Subject: [PATCH 89/92] [azure parallel] test more precisely when array constructor arguments are passed in + fixes --- sklearn/mixture/_gaussian_mixture.py | 14 ++- .../mixture/tests/test_gaussian_mixture.py | 109 ++++++++---------- 2 files changed, 58 insertions(+), 65 deletions(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 05d42f68decbc..cd6523d1d2784 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -51,7 +51,7 @@ def _check_weights(weights, n_components, xp=None): # check normalization atol = 1e-6 if weights.dtype == xp.float32 else 1e-8 - if not xp.all(xpx.isclose(xp.abs(1.0 - xp.sum(weights)), 0.0, atol=atol, xp=xp)): + if not np.allclose(float(xp.abs(1.0 - xp.sum(weights))), 0.0, atol=atol): raise ValueError( "The parameter 'weights' should be normalized, but got sum(weights) = %.5f" % xp.sum(weights) @@ -105,8 +105,8 @@ def _check_precision_matrix(precision, covariance_type, xp=None): def _check_precisions_full(precisions, covariance_type, xp=None): """Check the precision matrices are symmetric and positive-definite.""" xp, _ = get_namespace(precisions, xp=xp) - for prec in precisions: - _check_precision_matrix(prec, covariance_type, xp=xp) + for i in range(precisions.shape[0]): + _check_precision_matrix(precisions[i, :, :], covariance_type, xp=xp) def _check_precisions(precisions, covariance_type, n_components, n_features, xp=None): @@ -427,10 +427,12 @@ def _compute_precision_cholesky_from_precisions(precisions, covariance_type, xp= components. The shape depends on the covariance_type. """ if covariance_type == "full": - precisions_cholesky = xp.asarray( + precisions_cholesky = xp.stack( [ - _flipudlr(_cholesky(_flipudlr(precision, xp=xp), xp=xp), xp=xp) - for precision in precisions + _flipudlr( + _cholesky(_flipudlr(precisions[i, :, :], xp=xp), xp=xp), xp=xp + ) + for i in range(precisions.shape[0]) ] ) elif covariance_type == "tied": diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index 74e2d9740d234..f03e86aa547dc 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1490,8 +1490,14 @@ def test_gaussian_mixture_all_init_does_not_estimate_gaussian_parameters( yield_namespace_device_dtype_combinations(), ids=_get_namespace_device_dtype_ids, ) +@pytest.mark.parametrize("use_gmm_array_constructor_arguments", [False, True]) def test_gaussian_mixture_array_api_compliance( - init_params, covariance_type, array_namespace, device_, dtype + init_params, + covariance_type, + array_namespace, + device_, + dtype, + use_gmm_array_constructor_arguments, ): """Test that array api works in GaussianMixture.fit().""" xp = _array_api_for_tests(array_namespace, device_) @@ -1501,11 +1507,21 @@ def test_gaussian_mixture_array_api_compliance( X = rand_data.X[covariance_type] X = X.astype(dtype) + if use_gmm_array_constructor_arguments: + additional_kwargs = { + "means_init": rand_data.means.astype(dtype), + "precisions_init": rand_data.precisions[covariance_type].astype(dtype), + "weights_init": rand_data.weights.astype(dtype), + } + else: + additional_kwargs = {} + gmm = GaussianMixture( n_components=rand_data.n_components, covariance_type=covariance_type, random_state=0, init_params=init_params, + **additional_kwargs, ) gmm.fit(X) @@ -1513,6 +1529,13 @@ def test_gaussian_mixture_array_api_compliance( with sklearn.config_context(array_api_dispatch=True): gmm_xp = sklearn.clone(gmm) + for param_name, param_value in additional_kwargs.items(): + arg_xp = xp.asarray(param_value, device=device_) + setattr(gmm_xp, param_name, arg_xp) + + print(gmm.means_init) + print(gmm.precisions_init) + print(gmm.weights_init) gmm_xp.fit(X_xp) assert get_namespace(gmm_xp.means_)[0] == xp @@ -1542,10 +1565,27 @@ def test_gaussian_mixture_array_api_compliance( for score in [score_xp, aic_xp, bic_xp]: assert isinstance(score, float) - # Check methods - float32_rtol = 1e-4 if dtype == "float32" else 1e-7 + # Define specific rtol to make tests pass + default_rtol = 1e-4 if dtype == "float32" else 1e-7 + increased_atol = 1e-4 if dtype == "float32" else 0 increased_rtol = 5e-4 if dtype == "float32" else 1e-7 + # Check fitted attributes + assert_allclose(gmm.means_, _convert_to_numpy(gmm_xp.means_, xp=xp)) + assert_allclose( + gmm.covariances_, + _convert_to_numpy(gmm_xp.covariances_, xp=xp), + atol=increased_atol, + rtol=increased_rtol, + ) + assert_allclose( + gmm.precisions_, + _convert_to_numpy(gmm_xp.precisions_, xp=xp), + atol=increased_atol, + rtol=increased_rtol, + ) + + # Check methods assert ( adjusted_rand_score(gmm.predict(X), _convert_to_numpy(predict_xp, xp=xp)) > 0.95 ) @@ -1553,71 +1593,22 @@ def test_gaussian_mixture_array_api_compliance( gmm.predict_proba(X), _convert_to_numpy(predict_proba_xp, xp=xp), rtol=increased_rtol, + atol=increased_atol, ) assert_allclose( gmm.score_samples(X), _convert_to_numpy(score_samples_xp, xp=xp), rtol=increased_rtol, ) - # comparing Python floats so need explicit rtol - assert_allclose(gmm.score(X), score_xp, rtol=float32_rtol) - assert_allclose(gmm.aic(X), aic_xp, rtol=float32_rtol) - assert_allclose(gmm.bic(X), bic_xp, rtol=float32_rtol) + # comparing Python float so need explicit rtol when X has dtype float32 + assert_allclose(gmm.score(X), score_xp, rtol=default_rtol) + assert_allclose(gmm.aic(X), aic_xp, rtol=default_rtol) + assert_allclose(gmm.bic(X), bic_xp, rtol=default_rtol) sample_X, sample_y = gmm.sample(10) - # generated samples are float64 so need explicit rtol for dtype=float32 - assert_allclose(sample_X, _convert_to_numpy(sample_X_xp, xp=xp), rtol=float32_rtol) + # generated samples are float64 so need explicit rtol when X has dtype float32 + assert_allclose(sample_X, _convert_to_numpy(sample_X_xp, xp=xp), rtol=default_rtol) assert_allclose(sample_y, _convert_to_numpy(sample_y_xp, xp=xp)) - # Check fitted attributes - assert_allclose(gmm.means_, _convert_to_numpy(gmm_xp.means_, xp=xp)) - assert_allclose( - gmm.covariances_, - _convert_to_numpy(gmm_xp.covariances_, xp=xp), - rtol=increased_rtol, - ) - - -@pytest.mark.parametrize( - "array_namespace, device_, dtype", - yield_namespace_device_dtype_combinations(), - ids=_get_namespace_device_dtype_ids, -) -def test_gaussian_mixture_array_api_compliance_with_array_like_constructor_parameters( - array_namespace, device_, dtype, global_random_seed -): - """Check that array api works with array-like constructors: 'means_init', - 'precisions_init' and 'weights_init' - """ - n_features = 2 - n_components = 3 - X, _ = make_blobs( - n_samples=int(1e3), - n_features=n_features, - centers=3, - random_state=global_random_seed, - ) - X = X.astype(dtype) - - xp = _array_api_for_tests(array_namespace, device_) - X = xp.asarray(X, device=device_) - - means_init = xp.zeros((n_components, n_features), device=device_, dtype=X.dtype) - precisions_init = xp.ones((n_components, n_features), device=device_, dtype=X.dtype) - gmm = GaussianMixture( - n_components=3, - covariance_type="diag", - random_state=global_random_seed, - init_params="random", - means_init=means_init, - precisions_init=precisions_init, - weights_init=xp.asarray([0.1, 0.4, 0.5]), - ) - - with sklearn.config_context(array_api_dispatch=True): - gmm.fit(X) - - assert device(X) == device(gmm.weights_) - @skip_if_array_api_compat_not_configured @pytest.mark.parametrize("init_params", ["kmeans", "k-means++"]) From 4f89101a5dd2468dd1bdc8fd8f5cb031c2b6cd0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Wed, 18 Jun 2025 15:35:28 +0200 Subject: [PATCH 90/92] [azure parallel] Remove debug --- sklearn/mixture/tests/test_gaussian_mixture.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index f03e86aa547dc..42f62e59c1192 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1533,9 +1533,6 @@ def test_gaussian_mixture_array_api_compliance( arg_xp = xp.asarray(param_value, device=device_) setattr(gmm_xp, param_name, arg_xp) - print(gmm.means_init) - print(gmm.precisions_init) - print(gmm.weights_init) gmm_xp.fit(X_xp) assert get_namespace(gmm_xp.means_)[0] == xp From d2ca2097dc7056eab8ccc27550096e4c8d1a5c22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Thu, 19 Jun 2025 11:14:54 +0200 Subject: [PATCH 91/92] Test more attributes --- sklearn/mixture/tests/test_gaussian_mixture.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index 42f62e59c1192..19510b4993329 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1569,12 +1569,19 @@ def test_gaussian_mixture_array_api_compliance( # Check fitted attributes assert_allclose(gmm.means_, _convert_to_numpy(gmm_xp.means_, xp=xp)) + assert_allclose(gmm.weights_, _convert_to_numpy(gmm_xp.weights_, xp=xp)) assert_allclose( gmm.covariances_, _convert_to_numpy(gmm_xp.covariances_, xp=xp), atol=increased_atol, rtol=increased_rtol, ) + assert_allclose( + gmm.precisions_cholesky_, + _convert_to_numpy(gmm_xp.precisions_cholesky_, xp=xp), + atol=increased_atol, + rtol=increased_rtol, + ) assert_allclose( gmm.precisions_, _convert_to_numpy(gmm_xp.precisions_, xp=xp), From d46840bdf6c3b42342cfb99efef51936c94576f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Thu, 19 Jun 2025 11:15:09 +0200 Subject: [PATCH 92/92] Increase tol to make tests pass --- sklearn/mixture/tests/test_gaussian_mixture.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index 19510b4993329..794a4dfc070ce 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1564,8 +1564,8 @@ def test_gaussian_mixture_array_api_compliance( # Define specific rtol to make tests pass default_rtol = 1e-4 if dtype == "float32" else 1e-7 - increased_atol = 1e-4 if dtype == "float32" else 0 - increased_rtol = 5e-4 if dtype == "float32" else 1e-7 + increased_atol = 5e-4 if dtype == "float32" else 0 + increased_rtol = 1e-3 if dtype == "float32" else 1e-7 # Check fitted attributes assert_allclose(gmm.means_, _convert_to_numpy(gmm_xp.means_, xp=xp))