From b17d3752b8faa9c6a8fc62630df404fc4fcd32c9 Mon Sep 17 00:00:00 2001 From: Jeremy Steward Date: Fri, 3 Apr 2015 16:46:02 -0600 Subject: [PATCH 1/5] Fixes issue where 1d arrays improperly reshape --- sklearn/covariance/robust_covariance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/covariance/robust_covariance.py b/sklearn/covariance/robust_covariance.py index afacb1fa4a690..57efa3f45d2f4 100644 --- a/sklearn/covariance/robust_covariance.py +++ b/sklearn/covariance/robust_covariance.py @@ -361,7 +361,7 @@ def fast_mcd(X, support_fraction=None, X = np.asarray(X) if X.ndim == 1: - X = np.reshape(X, (1, -1)) + X = np.reshape(X, (-1, 1)) warnings.warn("Only one sample available. " "You may want to reshape your data array") n_samples, n_features = X.shape From 71d016f96ead2b5b3b8c8be31dc75cb0c206a32a Mon Sep 17 00:00:00 2001 From: Jeremy Steward Date: Fri, 3 Apr 2015 17:07:49 -0600 Subject: [PATCH 2/5] Changes warning message if 1Darray passed to fast_mcd --- sklearn/covariance/robust_covariance.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/covariance/robust_covariance.py b/sklearn/covariance/robust_covariance.py index 57efa3f45d2f4..0e975f105e3b3 100644 --- a/sklearn/covariance/robust_covariance.py +++ b/sklearn/covariance/robust_covariance.py @@ -362,8 +362,9 @@ def fast_mcd(X, support_fraction=None, X = np.asarray(X) if X.ndim == 1: X = np.reshape(X, (-1, 1)) - warnings.warn("Only one sample available. " - "You may want to reshape your data array") + warnings.warn("1D array passed in. " + "Assuming the array contains samples, not features. " + "You may wish to reshape your data.") n_samples, n_features = X.shape # minimum breakdown value From 8ec134f27762648d5dafd2cd4f3f0b84ee513f28 Mon Sep 17 00:00:00 2001 From: Jeremy Steward Date: Sun, 5 Apr 2015 13:35:51 -0600 Subject: [PATCH 3/5] temp change to singular covariance --- sklearn/covariance/robust_covariance.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/sklearn/covariance/robust_covariance.py b/sklearn/covariance/robust_covariance.py index 0e975f105e3b3..0975122d32d3a 100644 --- a/sklearn/covariance/robust_covariance.py +++ b/sklearn/covariance/robust_covariance.py @@ -142,12 +142,13 @@ def _c_step(X, n_support, random_state, remaining_iterations=30, dist = (np.dot(X - location, precision) * (X - location)).sum(axis=1) # Catch computation errors if np.isinf(det): - raise ValueError( - "Singular covariance matrix. " - "Please check that the covariance matrix corresponding " - "to the dataset is full rank and that MinCovDet is used with " - "Gaussian-distributed data (or at least data drawn from a " - "unimodal, symmetric distribution.") + return location, covariance, det, support, dist + # raise ValueError( + # "Singular covariance matrix. " + # "Please check that the covariance matrix corresponding " + # "to the dataset is full rank and that MinCovDet is used with " + # "Gaussian-distributed data (or at least data drawn from a " + # "unimodal, symmetric distribution.") # Check convergence if np.allclose(det, previous_det): # c_step procedure converged From b124b1c69043448526ca5207296941f9bb2b8aa6 Mon Sep 17 00:00:00 2001 From: Jeremy Steward Date: Sun, 5 Apr 2015 14:00:12 -0600 Subject: [PATCH 4/5] Adds break condition to determinant check in _c_step --- sklearn/covariance/robust_covariance.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/covariance/robust_covariance.py b/sklearn/covariance/robust_covariance.py index 0975122d32d3a..929d8e7ff8103 100644 --- a/sklearn/covariance/robust_covariance.py +++ b/sklearn/covariance/robust_covariance.py @@ -118,7 +118,7 @@ def _c_step(X, n_support, random_state, remaining_iterations=30, # Iterative procedure for Minimum Covariance Determinant computation det = fast_logdet(covariance) previous_det = np.inf - while (det < previous_det) and (remaining_iterations > 0): + while (det < previous_det) and (remaining_iterations > 0) and (det != -np.inf): # save old estimates values previous_location = location previous_covariance = covariance @@ -142,7 +142,7 @@ def _c_step(X, n_support, random_state, remaining_iterations=30, dist = (np.dot(X - location, precision) * (X - location)).sum(axis=1) # Catch computation errors if np.isinf(det): - return location, covariance, det, support, dist + results = location, covariance, det, support, dist # raise ValueError( # "Singular covariance matrix. " # "Please check that the covariance matrix corresponding " From 2226a65c75c0f2af4bf4a2f8dc5ec4695a19060c Mon Sep 17 00:00:00 2001 From: Jeremy Steward Date: Mon, 6 Apr 2015 08:23:18 -0600 Subject: [PATCH 5/5] Fixes issue with singular matrices --- sklearn/covariance/robust_covariance.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/sklearn/covariance/robust_covariance.py b/sklearn/covariance/robust_covariance.py index 929d8e7ff8103..1d80b34fc4ce1 100644 --- a/sklearn/covariance/robust_covariance.py +++ b/sklearn/covariance/robust_covariance.py @@ -118,7 +118,7 @@ def _c_step(X, n_support, random_state, remaining_iterations=30, # Iterative procedure for Minimum Covariance Determinant computation det = fast_logdet(covariance) previous_det = np.inf - while (det < previous_det) and (remaining_iterations > 0) and (det != -np.inf): + while (det < previous_det) and (remaining_iterations > 0) and not (np.isinf(det)): # save old estimates values previous_location = location previous_covariance = covariance @@ -140,15 +140,9 @@ def _c_step(X, n_support, random_state, remaining_iterations=30, previous_dist = dist dist = (np.dot(X - location, precision) * (X - location)).sum(axis=1) - # Catch computation errors + # Check if best fit already found (det => 0, logdet => -inf) if np.isinf(det): results = location, covariance, det, support, dist - # raise ValueError( - # "Singular covariance matrix. " - # "Please check that the covariance matrix corresponding " - # "to the dataset is full rank and that MinCovDet is used with " - # "Gaussian-distributed data (or at least data drawn from a " - # "unimodal, symmetric distribution.") # Check convergence if np.allclose(det, previous_det): # c_step procedure converged