Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Use array_api in Gaussian Mixture Models #99

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion doc/whats_new/v1.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ Changelog
:user:`Sebastian Pujalte <pujaltes>`.

- |Enhancement| :func:`datasets.make_blobs` no longer copies data during the generation
process, therefore uses less memory.
process, therefore uses less memory.
:pr:`22412` by :user:`Zhehao Liu <MaxwellLZH>`.

- |Enhancement| :func:`datasets.load_diabetes` now accepts the parameter
Expand Down Expand Up @@ -491,6 +491,9 @@ Changelog
:mod:`sklearn.mixture`
......................

- |Enhancement| Added ArrayAPI support for :class:`mixture.GaussianMixture`
for `init_params="random"` and `covariance_type="full"`. :pr:`xxxxx` by `Thomas Fan`_.

- |Fix| Fix a bug that correctly initialize `precisions_cholesky_` in
:class:`mixture.GaussianMixture` when providing `precisions_init` by taking
its square root.
Expand Down
27 changes: 25 additions & 2 deletions sklearn/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"working_memory": int(os.environ.get("SKLEARN_WORKING_MEMORY", 1024)),
"print_changed_only": True,
"display": "text",
"array_api_dispatch": False,
}
_threadlocal = threading.local()

Expand Down Expand Up @@ -40,7 +41,11 @@ def get_config():


def set_config(
assume_finite=None, working_memory=None, print_changed_only=None, display=None
assume_finite=None,
working_memory=None,
print_changed_only=None,
display=None,
array_api_dispatch=None,
):
"""Set global scikit-learn configuration

Expand Down Expand Up @@ -80,6 +85,11 @@ def set_config(

.. versionadded:: 0.23

array_api_dispatch : bool, default=None
Configure scikit-learn to use Array-API. Global default: False

.. versionadded:: 1.2

See Also
--------
config_context : Context manager for global scikit-learn configuration.
Expand All @@ -95,11 +105,18 @@ def set_config(
local_config["print_changed_only"] = print_changed_only
if display is not None:
local_config["display"] = display
if array_api_dispatch is not None:
local_config["array_api_dispatch"] = array_api_dispatch


@contextmanager
def config_context(
*, assume_finite=None, working_memory=None, print_changed_only=None, display=None
*,
assume_finite=None,
working_memory=None,
print_changed_only=None,
display=None,
array_api_dispatch=None,
):
"""Context manager for global scikit-learn configuration.

Expand Down Expand Up @@ -138,6 +155,11 @@ def config_context(

.. versionadded:: 0.23

array_api_dispatch : bool, default=None
Configure scikit-learn to use Array-API. Global default: False

.. versionadded:: 1.2

Yields
------
None.
Expand Down Expand Up @@ -171,6 +193,7 @@ def config_context(
working_memory=working_memory,
print_changed_only=print_changed_only,
display=display,
array_api_dispatch=array_api_dispatch,
)

try:
Expand Down
23 changes: 14 additions & 9 deletions sklearn/mixture/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
from time import time

import numpy as np
from scipy.special import logsumexp

from .. import cluster
from ..base import BaseEstimator
from ..base import DensityMixin
from ..exceptions import ConvergenceWarning
from ..utils import check_random_state
from ..utils._array_api import get_namespace, logsumexp
from ..utils.validation import check_is_fitted


Expand Down Expand Up @@ -148,8 +148,10 @@ def _initialize_parameters(self, X, random_state):
)
resp[np.arange(n_samples), label] = 1
elif self.init_params == "random":
xp, _ = get_namespace(X)
resp = random_state.uniform(size=(n_samples, self.n_components))
resp /= resp.sum(axis=1)[:, np.newaxis]
resp = xp.asarray(resp)
resp /= xp.reshape(xp.sum(resp, axis=1), (-1, 1))
else:
raise ValueError(
"Unimplemented initialization method '%s'" % self.init_params
Expand Down Expand Up @@ -225,7 +227,8 @@ def fit_predict(self, X, y=None):
labels : array, shape (n_samples,)
Component labels.
"""
X = self._validate_data(X, dtype=[np.float64, np.float32], ensure_min_samples=2)
xp, _ = get_namespace(X)
X = self._validate_data(X, dtype=[xp.float64, xp.float32], ensure_min_samples=2)
if X.shape[0] < self.n_components:
raise ValueError(
"Expected n_samples >= n_components "
Expand All @@ -238,7 +241,7 @@ def fit_predict(self, X, y=None):
do_init = not (self.warm_start and hasattr(self, "converged_"))
n_init = self.n_init if do_init else 1

max_lower_bound = -np.inf
max_lower_bound = -xp.inf
self.converged_ = False

random_state = check_random_state(self.random_state)
Expand All @@ -250,7 +253,7 @@ def fit_predict(self, X, y=None):
if do_init:
self._initialize_parameters(X, random_state)

lower_bound = -np.inf if do_init else self.lower_bound_
lower_bound = -xp.inf if do_init else self.lower_bound_

for n_iter in range(1, self.max_iter + 1):
prev_lower_bound = lower_bound
Expand All @@ -268,7 +271,7 @@ def fit_predict(self, X, y=None):

self._print_verbose_msg_init_end(lower_bound)

if lower_bound > max_lower_bound or max_lower_bound == -np.inf:
if lower_bound > max_lower_bound or max_lower_bound == -xp.inf:
max_lower_bound = lower_bound
best_params = self._get_parameters()
best_n_iter = n_iter
Expand All @@ -291,7 +294,7 @@ def fit_predict(self, X, y=None):
# for any value of max_iter and tol (and any random_state).
_, log_resp = self._e_step(X)

return log_resp.argmax(axis=1)
return xp.argmax(log_resp, axis=1)

def _e_step(self, X):
"""E step.
Expand All @@ -309,8 +312,9 @@ def _e_step(self, X):
Logarithm of the posterior probabilities (or responsibilities) of
the point of each sample in X.
"""
xp, _ = get_namespace(X)
log_prob_norm, log_resp = self._estimate_log_prob_resp(X)
return np.mean(log_prob_norm), log_resp
return xp.mean(log_prob_norm), log_resp

@abstractmethod
def _m_step(self, X, log_resp):
Expand Down Expand Up @@ -529,11 +533,12 @@ def _estimate_log_prob_resp(self, X):
log_responsibilities : array, shape (n_samples, n_components)
logarithm of the responsibilities
"""
xp, _ = get_namespace(X)
weighted_log_prob = self._estimate_weighted_log_prob(X)
log_prob_norm = logsumexp(weighted_log_prob, axis=1)
with np.errstate(under="ignore"):
# ignore underflow
log_resp = weighted_log_prob - log_prob_norm[:, np.newaxis]
log_resp = weighted_log_prob - xp.reshape(log_prob_norm, (-1, 1))
return log_prob_norm, log_resp

def _print_verbose_msg_init_beg(self, n_init):
Expand Down
89 changes: 56 additions & 33 deletions sklearn/mixture/_gaussian_mixture.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,16 @@
# License: BSD 3 clause

import numpy as np
from math import log
from functools import partial

from scipy import linalg
import scipy

from ._base import BaseMixture, _check_shape
from ..utils import check_array
from ..utils.extmath import row_norms
from ..utils._array_api import get_namespace


###############################################################################
Expand Down Expand Up @@ -171,12 +175,17 @@ def _estimate_gaussian_covariances_full(resp, X, nk, means, reg_covar):
covariances : array, shape (n_components, n_features, n_features)
The covariance matrix of the current components.
"""
xp, is_array_api = get_namespace(resp, X, nk)
n_components, n_features = means.shape
covariances = np.empty((n_components, n_features, n_features))
covariances = xp.empty((n_components, n_features, n_features))
for k in range(n_components):
diff = X - means[k]
covariances[k] = np.dot(resp[:, k] * diff.T, diff) / nk[k]
covariances[k].flat[:: n_features + 1] += reg_covar
diff = X - means[k, :]
covariances[k, :, :] = ((resp[:, k] * diff.T) @ diff) / nk[k]
if is_array_api:
for i in range(n_features):
covariances[k, i, i] += reg_covar
else:
covariances[k].flat[:: n_features + 1] += reg_covar
return covariances


Expand Down Expand Up @@ -286,8 +295,9 @@ def _estimate_gaussian_parameters(X, resp, reg_covar, covariance_type):
The covariance matrix of the current components.
The shape depends of the covariance_type.
"""
nk = resp.sum(axis=0) + 10 * np.finfo(resp.dtype).eps
means = np.dot(resp.T, X) / nk[:, np.newaxis]
xp, _ = get_namespace(X, resp)
nk = xp.sum(resp, axis=0) + 10 * xp.finfo(resp.dtype).eps
means = resp.T @ X / xp.reshape(nk, (-1, 1))
covariances = {
"full": _estimate_gaussian_covariances_full,
"tied": _estimate_gaussian_covariances_tied,
Expand Down Expand Up @@ -321,27 +331,34 @@ def _compute_precision_cholesky(covariances, covariance_type):
"or collapsed samples). Try to decrease the number of components, "
"or increase reg_covar."
)
xp, is_array_api = get_namespace(covariances)
if is_array_api:
cholesky = xp.linalg.cholesky
solve = xp.linalg.solve
else:
cholesky = partial(scipy.linalg.cholesky, lower=True)
solve = partial(scipy.linalg.solve_triangular, lower=True)

if covariance_type == "full":
n_components, n_features, _ = covariances.shape
precisions_chol = np.empty((n_components, n_features, n_features))
for k, covariance in enumerate(covariances):
precisions_chol = xp.empty((n_components, n_features, n_features))
for k in range(n_components):
try:
cov_chol = linalg.cholesky(covariance, lower=True)
cov_chol = cholesky(covariances[k, :, :])
except linalg.LinAlgError:
raise ValueError(estimate_precision_error_message)
precisions_chol[k] = linalg.solve_triangular(
cov_chol, np.eye(n_features), lower=True
).T
precisions_chol[k, :, :] = solve(cov_chol, xp.eye(n_features)).T

if is_array_api:
precisions_chol[k, :, :] = xp.triu(precisions_chol[k, :, :])

elif covariance_type == "tied":
_, n_features = covariances.shape
try:
cov_chol = linalg.cholesky(covariances, lower=True)
cov_chol = cholesky(covariances)
except linalg.LinAlgError:
raise ValueError(estimate_precision_error_message)
precisions_chol = linalg.solve_triangular(
cov_chol, np.eye(n_features), lower=True
).T
precisions_chol = linalg.solve(cov_chol, np.eye(n_features)).T
else:
if np.any(np.less_equal(covariances, 0.0)):
raise ValueError(estimate_precision_error_message)
Expand Down Expand Up @@ -373,20 +390,20 @@ def _compute_log_det_cholesky(matrix_chol, covariance_type, n_features):
log_det_precision_chol : array-like of shape (n_components,)
The determinant of the precision matrix for each component.
"""
xp, _ = get_namespace(matrix_chol)
if covariance_type == "full":
n_components, _, _ = matrix_chol.shape
log_det_chol = np.sum(
np.log(matrix_chol.reshape(n_components, -1)[:, :: n_features + 1]), 1
)
matrix_col_reshape = xp.reshape(matrix_chol, (n_components, -1))
log_det_chol = xp.sum(xp.log(matrix_col_reshape[:, :: n_features + 1]), axis=1)

elif covariance_type == "tied":
log_det_chol = np.sum(np.log(np.diag(matrix_chol)))
log_det_chol = xp.sum(xp.log(xp.diag(matrix_chol)))

elif covariance_type == "diag":
log_det_chol = np.sum(np.log(matrix_chol), axis=1)
log_det_chol = xp.sum(xp.log(matrix_chol), axis=1)

else:
log_det_chol = n_features * (np.log(matrix_chol))
log_det_chol = n_features * (xp.log(matrix_chol))

return log_det_chol

Expand All @@ -413,6 +430,7 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type):
-------
log_prob : array, shape (n_samples, n_components)
"""
xp, _ = get_namespace(X, means, precisions_chol)
n_samples, n_features = X.shape
n_components, _ = means.shape
# The determinant of the precision matrix from the Cholesky decomposition
Expand All @@ -422,10 +440,12 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type):
log_det = _compute_log_det_cholesky(precisions_chol, covariance_type, n_features)

if covariance_type == "full":
log_prob = np.empty((n_samples, n_components))
for k, (mu, prec_chol) in enumerate(zip(means, precisions_chol)):
y = np.dot(X, prec_chol) - np.dot(mu, prec_chol)
log_prob[:, k] = np.sum(np.square(y), axis=1)
log_prob = xp.empty((n_samples, n_components))
for k in range(n_components):
mu = means[k, :]
prec_chol = precisions_chol[k, :, :]
y = X @ prec_chol - mu @ prec_chol
log_prob[:, k] = xp.sum(xp.square(y), axis=1)

elif covariance_type == "tied":
log_prob = np.empty((n_samples, n_components))
Expand All @@ -450,7 +470,7 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type):
)
# Since we are using the precision of the Cholesky decomposition,
# `- 0.5 * log_det_precision` becomes `+ log_det_precision_chol`
return -0.5 * (n_features * np.log(2 * np.pi) + log_prob) + log_det
return -0.5 * (n_features * log(2 * xp.pi) + log_prob) + log_det


class GaussianMixture(BaseMixture):
Expand Down Expand Up @@ -742,8 +762,9 @@ def _m_step(self, X, log_resp):
the point of each sample in X.
"""
n_samples, _ = X.shape
xp, _ = get_namespace(X, log_resp)
self.weights_, self.means_, self.covariances_ = _estimate_gaussian_parameters(
X, np.exp(log_resp), self.reg_covar, self.covariance_type
X, xp.exp(log_resp), self.reg_covar, self.covariance_type
)
self.weights_ /= n_samples
self.precisions_cholesky_ = _compute_precision_cholesky(
Expand All @@ -756,7 +777,8 @@ def _estimate_log_prob(self, X):
)

def _estimate_log_weights(self):
return np.log(self.weights_)
xp, _ = get_namespace(self.weights_)
return xp.log(self.weights_)

def _compute_lower_bound(self, _, log_prob_norm):
return log_prob_norm
Expand All @@ -779,11 +801,12 @@ def _set_parameters(self, params):

# Attributes computation
_, n_features = self.means_.shape

if self.covariance_type == "full":
self.precisions_ = np.empty(self.precisions_cholesky_.shape)
for k, prec_chol in enumerate(self.precisions_cholesky_):
self.precisions_[k] = np.dot(prec_chol, prec_chol.T)
prec_cho = self.precisions_cholesky_
xp, _ = get_namespace(prec_cho)
self.precisions_ = xp.empty(prec_cho.shape)
for k in range(prec_cho.shape[0]):
self.precisions_[k, :, :] = prec_cho[k, :, :] @ prec_cho[k, :, :].T

elif self.covariance_type == "tied":
self.precisions_ = np.dot(
Expand Down
Loading