From 8db148a0b79ab6e9020fccbd703d508fcfafa716 Mon Sep 17 00:00:00 2001 From: Christopher Yeh Date: Tue, 16 Mar 2021 01:23:14 -0600 Subject: [PATCH 1/4] Improve documentation consistency for GaussianProcessRegressor --- sklearn/gaussian_process/_gpr.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sklearn/gaussian_process/_gpr.py b/sklearn/gaussian_process/_gpr.py index b4ab0441efc71..e5f44c7d3a3d2 100644 --- a/sklearn/gaussian_process/_gpr.py +++ b/sklearn/gaussian_process/_gpr.py @@ -30,9 +30,9 @@ class GaussianProcessRegressor(MultiOutputMixin, GaussianProcessRegressor: * allows prediction without prior fitting (based on the GP prior) - * provides an additional method sample_y(X), which evaluates samples + * provides an additional method `sample_y(X)`, which evaluates samples drawn from the GPR (prior or posterior) at given inputs - * exposes a method log_marginal_likelihood(theta), which can be used + * exposes a method `log_marginal_likelihood(theta)`, which can be used externally for other ways of selecting hyperparameters, e.g., via Markov chain Monte Carlo. @@ -68,8 +68,8 @@ class GaussianProcessRegressor(MultiOutputMixin, must have the signature:: def optimizer(obj_func, initial_theta, bounds): - # * 'obj_func' is the objective function to be minimized, which - # takes the hyperparameters theta as parameter and an + # * 'obj_func': the objective function to be minimized, which + # takes the hyperparameters theta as a parameter and an # optional flag eval_gradient, which determines if the # gradient is returned additionally to the function value # * 'initial_theta': the initial value for theta, which can be @@ -80,7 +80,7 @@ def optimizer(obj_func, initial_theta, bounds): # the corresponding value of the target function. return theta_opt, func_min - Per default, the 'L-BGFS-B' algorithm from scipy.optimize.minimize + Per default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize is used. If None is passed, the kernel's parameters are kept fixed. Available internal optimizers are:: @@ -113,7 +113,7 @@ def optimizer(obj_func, initial_theta, bounds): random_state : int, RandomState instance or None, default=None Determines random number generation used to initialize the centers. Pass an int for reproducible results across multiple function calls. - See :term: `Glossary `. + See :term:`Glossary `. Attributes ---------- @@ -302,7 +302,7 @@ def predict(self, X, return_std=False, return_cov=False): Returns ------- - y_mean : ndarray of shape (n_samples, [n_output_dims]) + y_mean : ndarray of shape (n_samples, [n_targets]) Mean of predictive distribution a query points. y_std : ndarray of shape (n_samples,), optional @@ -399,11 +399,11 @@ def sample_y(self, X, n_samples=1, random_state=0): Determines random number generation to randomly draw samples. Pass an int for reproducible results across multiple function calls. - See :term: `Glossary `. + See :term:`Glossary `. Returns ------- - y_samples : ndarray of shape (n_samples_X, [n_output_dims], n_samples) + y_samples : ndarray of shape (n_samples, [n_targets], n_samples) Values of n_samples samples drawn from Gaussian process and evaluated at query points. """ From 44c834f77f705c9e29f611aa79143fec541bb948 Mon Sep 17 00:00:00 2001 From: Christopher Yeh Date: Mon, 22 Mar 2021 15:45:07 -0600 Subject: [PATCH 2/4] Apply suggestions from code review Co-authored-by: Thomas J. Fan --- sklearn/gaussian_process/_gpr.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/gaussian_process/_gpr.py b/sklearn/gaussian_process/_gpr.py index e5f44c7d3a3d2..0f2021eb26690 100644 --- a/sklearn/gaussian_process/_gpr.py +++ b/sklearn/gaussian_process/_gpr.py @@ -302,7 +302,7 @@ def predict(self, X, return_std=False, return_cov=False): Returns ------- - y_mean : ndarray of shape (n_samples, [n_targets]) + y_mean : ndarray of shape (n_samples,) or (n_samples, n_targets) Mean of predictive distribution a query points. y_std : ndarray of shape (n_samples,), optional @@ -403,7 +403,8 @@ def sample_y(self, X, n_samples=1, random_state=0): Returns ------- - y_samples : ndarray of shape (n_samples, [n_targets], n_samples) + y_samples : ndarray of shape (n_samples, n_samples), or \ + (n_samples, n_targets, n_samples) Values of n_samples samples drawn from Gaussian process and evaluated at query points. """ From 68b04ec22e8acf447a8c1223554abdd6f14c57e3 Mon Sep 17 00:00:00 2001 From: Christopher Yeh Date: Tue, 23 Mar 2021 14:52:00 -0600 Subject: [PATCH 3/4] More code / documentation improvements for clarity in GPR --- sklearn/gaussian_process/_gpr.py | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/sklearn/gaussian_process/_gpr.py b/sklearn/gaussian_process/_gpr.py index 0f2021eb26690..8a80761cf3841 100644 --- a/sklearn/gaussian_process/_gpr.py +++ b/sklearn/gaussian_process/_gpr.py @@ -211,8 +211,8 @@ def fit(self, X, y): if self.alpha.shape[0] == 1: self.alpha = self.alpha[0] else: - raise ValueError("alpha must be a scalar or an array" - " with same number of entries as y.(%d != %d)" + raise ValueError("alpha must be a scalar or an array " + "with same number of entries as y. (%d != %d)" % (self.alpha.shape[0], y.shape[0])) self.X_train_ = np.copy(X) if self.copy_X_train else X @@ -283,9 +283,9 @@ def predict(self, X, return_std=False, return_cov=False): """Predict using the Gaussian process regression model We can also predict based on an unfitted model by using the GP prior. - In addition to the mean of the predictive distribution, also its - standard deviation (return_std=True) or covariance (return_cov=True). - Note that at most one of the two can be requested. + In addition to the mean of the predictive distribution, optionally also + returns its standard deviation (`return_std=True`) or covariance + (`return_cov=True`). Note that at most one of the two can be requested. Parameters ---------- @@ -315,8 +315,7 @@ def predict(self, X, return_std=False, return_cov=False): """ if return_std and return_cov: raise RuntimeError( - "Not returning standard deviation of predictions when " - "returning full covariance.") + "At most one of return_std or return_cov can be requested.") if self.kernel is None or self.kernel.requires_vector_input: X = self._validate_data(X, ensure_2d=True, dtype="numeric", @@ -342,14 +341,14 @@ def predict(self, X, return_std=False, return_cov=False): return y_mean else: # Predict based on GP posterior K_trans = self.kernel_(X, self.X_train_) - y_mean = K_trans.dot(self.alpha_) # Line 4 (y_mean = f_star) + y_mean = K_trans @ self.alpha_ # Line 4 (y_mean = f_star) # undo normalisation y_mean = self._y_train_std * y_mean + self._y_train_mean if return_cov: v = cho_solve((self.L_, True), K_trans.T) # Line 5 - y_cov = self.kernel_(X) - K_trans.dot(v) # Line 6 + y_cov = self.kernel_(X) - K_trans @ v # Line 6 # undo normalisation y_cov = y_cov * self._y_train_std**2 @@ -362,12 +361,12 @@ def predict(self, X, return_std=False, return_cov=False): # decomposition L and its inverse L_inv L_inv = solve_triangular(self.L_.T, np.eye(self.L_.shape[0])) - self._K_inv = L_inv.dot(L_inv.T) + self._K_inv = L_inv @ L_inv.T # Compute variance of predictive distribution y_var = self.kernel_.diag(X) y_var -= np.einsum("ij,ij->i", - np.dot(K_trans, self._K_inv), K_trans) + K_trans @ self._K_inv, K_trans) # Check if any of the variances is negative because of # numerical issues. If yes: set the variance to 0. @@ -389,11 +388,11 @@ def sample_y(self, X, n_samples=1, random_state=0): Parameters ---------- - X : array-like of shape (n_samples, n_features) or list of object + X : array-like of shape (n_samples_X, n_features) or list of object Query points where the GP is evaluated. n_samples : int, default=1 - The number of samples drawn from the Gaussian process + Number of samples drawn from the Gaussian process per query point random_state : int, RandomState instance or None, default=0 Determines random number generation to randomly draw samples. @@ -403,8 +402,8 @@ def sample_y(self, X, n_samples=1, random_state=0): Returns ------- - y_samples : ndarray of shape (n_samples, n_samples), or \ - (n_samples, n_targets, n_samples) + y_samples : ndarray of shape (n_samples_X, n_samples), or \ + (n_samples_X, n_targets, n_samples) Values of n_samples samples drawn from Gaussian process and evaluated at query points. """ From fe67c9e798657843ae228c9f6118e550a7fd5b99 Mon Sep 17 00:00:00 2001 From: Christopher Yeh Date: Tue, 23 Mar 2021 16:21:13 -0600 Subject: [PATCH 4/4] Undo np.dot -> @ --- sklearn/gaussian_process/_gpr.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/gaussian_process/_gpr.py b/sklearn/gaussian_process/_gpr.py index 8a80761cf3841..4e8814dd69951 100644 --- a/sklearn/gaussian_process/_gpr.py +++ b/sklearn/gaussian_process/_gpr.py @@ -341,14 +341,14 @@ def predict(self, X, return_std=False, return_cov=False): return y_mean else: # Predict based on GP posterior K_trans = self.kernel_(X, self.X_train_) - y_mean = K_trans @ self.alpha_ # Line 4 (y_mean = f_star) + y_mean = K_trans.dot(self.alpha_) # Line 4 (y_mean = f_star) # undo normalisation y_mean = self._y_train_std * y_mean + self._y_train_mean if return_cov: v = cho_solve((self.L_, True), K_trans.T) # Line 5 - y_cov = self.kernel_(X) - K_trans @ v # Line 6 + y_cov = self.kernel_(X) - K_trans.dot(v) # Line 6 # undo normalisation y_cov = y_cov * self._y_train_std**2 @@ -361,12 +361,12 @@ def predict(self, X, return_std=False, return_cov=False): # decomposition L and its inverse L_inv L_inv = solve_triangular(self.L_.T, np.eye(self.L_.shape[0])) - self._K_inv = L_inv @ L_inv.T + self._K_inv = L_inv.dot(L_inv.T) # Compute variance of predictive distribution y_var = self.kernel_.diag(X) y_var -= np.einsum("ij,ij->i", - K_trans @ self._K_inv, K_trans) + np.dot(K_trans, self._K_inv), K_trans) # Check if any of the variances is negative because of # numerical issues. If yes: set the variance to 0.