From 1efbd76bfe891bd502f990f7215a04bc4a551c88 Mon Sep 17 00:00:00 2001 From: Utkarsh Upadhyay Date: Fri, 20 Nov 2015 10:42:05 +0100 Subject: [PATCH 01/13] EHN: Show a Convergence warning if the max_iters were performed. --- sklearn/semi_supervised/label_propagation.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sklearn/semi_supervised/label_propagation.py b/sklearn/semi_supervised/label_propagation.py index c690ac1f151f4..f7b11383c3228 100644 --- a/sklearn/semi_supervised/label_propagation.py +++ b/sklearn/semi_supervised/label_propagation.py @@ -67,6 +67,7 @@ from ..utils.extmath import safe_sparse_dot from ..utils.multiclass import check_classification_targets from ..utils.validation import check_X_y, check_is_fitted, check_array +from ..exceptions import ConvergenceWarning # Helper functions @@ -287,6 +288,12 @@ def fit(self, X, y): alpha, self.label_distributions_) + y_static remaining_iter -= 1 + if remaining_iter <= 1: + warnings.warn( + 'max_iter=%d was reached without convergence.' % self.max_iter, + category=ConvergenceWarning + ) + normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis] self.label_distributions_ /= normalizer From 693e7bd25c1178782175c65913e2b166adee6f75 Mon Sep 17 00:00:00 2001 From: Utkarsh Upadhyay Date: Wed, 5 Jul 2017 00:23:17 +0200 Subject: [PATCH 02/13] DOC Indicate that max_iter is an integer. --- sklearn/semi_supervised/label_propagation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/semi_supervised/label_propagation.py b/sklearn/semi_supervised/label_propagation.py index f7b11383c3228..d4482fa483b6c 100644 --- a/sklearn/semi_supervised/label_propagation.py +++ b/sklearn/semi_supervised/label_propagation.py @@ -98,7 +98,7 @@ class BaseLabelPropagation(six.with_metaclass(ABCMeta, BaseEstimator, alpha : float Clamping factor - max_iter : float + max_iter : integer Change maximum number of iterations allowed tol : float @@ -331,7 +331,7 @@ class LabelPropagation(BaseLabelPropagation): This parameter will be removed in 0.21. 'alpha' is fixed to zero in 'LabelPropagation'. - max_iter : float + max_iter : integer Change maximum number of iterations allowed tol : float @@ -448,7 +448,7 @@ class LabelSpreading(BaseLabelPropagation): alpha=0 means keeping the initial label information; alpha=1 means replacing all initial information. - max_iter : float + max_iter : integer maximum number of iterations allowed tol : float From 5ec86d62e263b01827cf67beda52595949273e1c Mon Sep 17 00:00:00 2001 From: Utkarsh Upadhyay Date: Wed, 5 Jul 2017 00:24:06 +0200 Subject: [PATCH 03/13] FIX Change max_iter in Doctests to 1000. Otherwise, the Doctests resulted in ConvergenceWarning. --- sklearn/semi_supervised/label_propagation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/semi_supervised/label_propagation.py b/sklearn/semi_supervised/label_propagation.py index d4482fa483b6c..7d1ca6fd0b3f1 100644 --- a/sklearn/semi_supervised/label_propagation.py +++ b/sklearn/semi_supervised/label_propagation.py @@ -32,7 +32,7 @@ -------- >>> from sklearn import datasets >>> from sklearn.semi_supervised import LabelPropagation ->>> label_prop_model = LabelPropagation() +>>> label_prop_model = LabelPropagation(max_iter=1000) >>> iris = datasets.load_iris() >>> random_unlabeled_points = np.where(np.random.randint(0, 2, ... size=len(iris.target))) @@ -363,7 +363,7 @@ class LabelPropagation(BaseLabelPropagation): -------- >>> from sklearn import datasets >>> from sklearn.semi_supervised import LabelPropagation - >>> label_prop_model = LabelPropagation() + >>> label_prop_model = LabelPropagation(max_iter=1000) >>> iris = datasets.load_iris() >>> random_unlabeled_points = np.where(np.random.randint(0, 2, ... size=len(iris.target))) @@ -480,7 +480,7 @@ class LabelSpreading(BaseLabelPropagation): -------- >>> from sklearn import datasets >>> from sklearn.semi_supervised import LabelSpreading - >>> label_prop_model = LabelSpreading() + >>> label_prop_model = LabelSpreading(max_iter=1000) >>> iris = datasets.load_iris() >>> random_unlabeled_points = np.where(np.random.randint(0, 2, ... size=len(iris.target))) From 8d7cdda2a69f5ef4cc4a595f7cda6c9cdef896fe Mon Sep 17 00:00:00 2001 From: Utkarsh Upadhyay Date: Wed, 5 Jul 2017 08:57:21 +0200 Subject: [PATCH 04/13] Add test for ConvergenceWarning. --- sklearn/semi_supervised/tests/test_label_propagation.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py index 3d5bd21a89110..1de3634261124 100644 --- a/sklearn/semi_supervised/tests/test_label_propagation.py +++ b/sklearn/semi_supervised/tests/test_label_propagation.py @@ -9,6 +9,7 @@ from sklearn.semi_supervised import label_propagation from sklearn.metrics.pairwise import rbf_kernel from sklearn.datasets import make_classification +from sklearn.exceptions import ConvergenceWarning from numpy.testing import assert_array_almost_equal from numpy.testing import assert_array_equal @@ -145,3 +146,11 @@ def test_convergence_speed(): # this should converge quickly: assert mdl.n_iter_ < 10 assert_array_equal(mdl.predict(X), [0, 1, 1]) + + +def test_convergence_warning(): + # This is a non-regression test for #5774 + X = np.array([[1., 0.], [0., 1.], [1., 2.5]]) + y = np.array([0, 1, -1]) + mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=5) + assert_warns(ConvergenceWarning, mdl.fit, X, y) From 8e313bd0d7b3891ab1fa12e6e5b42b22899fd861 Mon Sep 17 00:00:00 2001 From: Utkarsh Upadhyay Date: Fri, 21 Jul 2017 00:11:54 +0200 Subject: [PATCH 05/13] Change defaults for LabelPropagation and LabelSpreading Also add assert_no_warnings. --- sklearn/semi_supervised/label_propagation.py | 15 ++++++--------- .../tests/test_label_propagation.py | 3 ++- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/sklearn/semi_supervised/label_propagation.py b/sklearn/semi_supervised/label_propagation.py index 7d1ca6fd0b3f1..faecd71ca6bac 100644 --- a/sklearn/semi_supervised/label_propagation.py +++ b/sklearn/semi_supervised/label_propagation.py @@ -32,10 +32,9 @@ -------- >>> from sklearn import datasets >>> from sklearn.semi_supervised import LabelPropagation ->>> label_prop_model = LabelPropagation(max_iter=1000) +>>> label_prop_model = LabelPropagation() >>> iris = datasets.load_iris() ->>> random_unlabeled_points = np.where(np.random.randint(0, 2, -... size=len(iris.target))) +>>> random_unlabeled_points = np.random.rand(len(iris.target)) < 0.3 >>> labels = np.copy(iris.target) >>> labels[random_unlabeled_points] = -1 >>> label_prop_model.fit(iris.data, labels) @@ -363,10 +362,9 @@ class LabelPropagation(BaseLabelPropagation): -------- >>> from sklearn import datasets >>> from sklearn.semi_supervised import LabelPropagation - >>> label_prop_model = LabelPropagation(max_iter=1000) + >>> label_prop_model = LabelPropagation() >>> iris = datasets.load_iris() - >>> random_unlabeled_points = np.where(np.random.randint(0, 2, - ... size=len(iris.target))) + >>> random_unlabeled_points = np.random.rand(len(iris.target)) < 0.3 >>> labels = np.copy(iris.target) >>> labels[random_unlabeled_points] = -1 >>> label_prop_model.fit(iris.data, labels) @@ -480,10 +478,9 @@ class LabelSpreading(BaseLabelPropagation): -------- >>> from sklearn import datasets >>> from sklearn.semi_supervised import LabelSpreading - >>> label_prop_model = LabelSpreading(max_iter=1000) + >>> label_prop_model = LabelSpreading() >>> iris = datasets.load_iris() - >>> random_unlabeled_points = np.where(np.random.randint(0, 2, - ... size=len(iris.target))) + >>> random_unlabeled_points = np.random.rand(len(iris.target)) < 0.3 >>> labels = np.copy(iris.target) >>> labels[random_unlabeled_points] = -1 >>> label_prop_model.fit(iris.data, labels) diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py index 1de3634261124..330fa0698d82a 100644 --- a/sklearn/semi_supervised/tests/test_label_propagation.py +++ b/sklearn/semi_supervised/tests/test_label_propagation.py @@ -140,7 +140,8 @@ def test_convergence_speed(): # This is a non-regression test for #5774 X = np.array([[1., 0.], [0., 1.], [1., 2.5]]) y = np.array([0, 1, -1]) - mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=5000) + mdl = assert_no_warnings(label_propagation.LabelSpreading, kernel='rbf', + max_iter=5000) mdl.fit(X, y) # this should converge quickly: From 8dc12a182ee42329f42294b0729366aa8a9ba7d6 Mon Sep 17 00:00:00 2001 From: Utkarsh Upadhyay Date: Fri, 21 Jul 2017 16:55:47 +0200 Subject: [PATCH 06/13] Add seed for tests. --- sklearn/semi_supervised/label_propagation.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sklearn/semi_supervised/label_propagation.py b/sklearn/semi_supervised/label_propagation.py index faecd71ca6bac..302b5bc0e6216 100644 --- a/sklearn/semi_supervised/label_propagation.py +++ b/sklearn/semi_supervised/label_propagation.py @@ -34,6 +34,7 @@ >>> from sklearn.semi_supervised import LabelPropagation >>> label_prop_model = LabelPropagation() >>> iris = datasets.load_iris() +>>> np.random.seed(42) >>> random_unlabeled_points = np.random.rand(len(iris.target)) < 0.3 >>> labels = np.copy(iris.target) >>> labels[random_unlabeled_points] = -1 @@ -364,6 +365,7 @@ class LabelPropagation(BaseLabelPropagation): >>> from sklearn.semi_supervised import LabelPropagation >>> label_prop_model = LabelPropagation() >>> iris = datasets.load_iris() + >>> np.random.seed(42) >>> random_unlabeled_points = np.random.rand(len(iris.target)) < 0.3 >>> labels = np.copy(iris.target) >>> labels[random_unlabeled_points] = -1 @@ -480,6 +482,7 @@ class LabelSpreading(BaseLabelPropagation): >>> from sklearn.semi_supervised import LabelSpreading >>> label_prop_model = LabelSpreading() >>> iris = datasets.load_iris() + >>> np.random.seed(42) >>> random_unlabeled_points = np.random.rand(len(iris.target)) < 0.3 >>> labels = np.copy(iris.target) >>> labels[random_unlabeled_points] = -1 From 9e04e3e824b02a129deea13e99eefad77f722a9a Mon Sep 17 00:00:00 2001 From: Utkarsh Upadhyay Date: Sat, 22 Jul 2017 10:57:15 +0200 Subject: [PATCH 07/13] Add assert_no_warn on all tests which rely on convergance. --- .../semi_supervised/tests/test_label_propagation.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py index 330fa0698d82a..41ef1f126ab74 100644 --- a/sklearn/semi_supervised/tests/test_label_propagation.py +++ b/sklearn/semi_supervised/tests/test_label_propagation.py @@ -39,7 +39,7 @@ def test_distribution(): samples = [[1., 0.], [0., 1.], [1., 1.]] labels = [0, 1, -1] for estimator, parameters in ESTIMATORS: - clf = estimator(**parameters).fit(samples, labels) + clf = assert_no_warnings(estimator(**parameters).fit, samples, labels) if parameters['kernel'] == 'knn': continue # unstable test; changes in k-NN ordering break it assert_array_almost_equal(clf.predict_proba([[1., 0.0]]), @@ -53,7 +53,7 @@ def test_predict(): samples = [[1., 0.], [0., 2.], [1., 3.]] labels = [0, 1, -1] for estimator, parameters in ESTIMATORS: - clf = estimator(**parameters).fit(samples, labels) + clf = assert_no_warnings(estimator(**parameters).fit, samples, labels) assert_array_equal(clf.predict([[0.5, 2.5]]), np.array([1])) @@ -61,7 +61,7 @@ def test_predict_proba(): samples = [[1., 0.], [0., 1.], [1., 2.5]] labels = [0, 1, -1] for estimator, parameters in ESTIMATORS: - clf = estimator(**parameters).fit(samples, labels) + clf = assert_no_warnings(estimator(**parameters).fit, samples, labels) assert_array_almost_equal(clf.predict_proba([[1., 1.]]), np.array([[0.5, 0.5]])) @@ -94,7 +94,7 @@ def test_label_spreading_closed_form(): expected = np.dot(np.linalg.inv(np.eye(len(S)) - alpha * S), Y) expected /= expected.sum(axis=1)[:, np.newaxis] clf = label_propagation.LabelSpreading(max_iter=10000, alpha=alpha) - clf.fit(X, y) + assert_no_warnings(clf.fit, X, y) assert_array_almost_equal(expected, clf.label_distributions_, 4) @@ -109,7 +109,8 @@ def test_label_propagation_closed_form(): labelled_idx = (Y[:, (-1,)] == 0).nonzero()[0] clf = label_propagation.LabelPropagation(max_iter=10000, - gamma=0.1).fit(X, y) + gamma=0.1) + assert_no_warnings(clf.fit, X, y) # adopting notation from Zhu et al 2002 T_bar = clf._build_graph() Tuu = T_bar[np.meshgrid(unlabelled_idx, unlabelled_idx, indexing='ij')] From 0b23b24c5cf19f4469fa3dac524e88d481cdd4b7 Mon Sep 17 00:00:00 2001 From: Utkarsh Upadhyay Date: Sun, 23 Jul 2017 15:41:35 +0200 Subject: [PATCH 08/13] Use a RandomState instead of setting seed globally --- sklearn/semi_supervised/label_propagation.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sklearn/semi_supervised/label_propagation.py b/sklearn/semi_supervised/label_propagation.py index 302b5bc0e6216..c5e344f15c54e 100644 --- a/sklearn/semi_supervised/label_propagation.py +++ b/sklearn/semi_supervised/label_propagation.py @@ -34,8 +34,8 @@ >>> from sklearn.semi_supervised import LabelPropagation >>> label_prop_model = LabelPropagation() >>> iris = datasets.load_iris() ->>> np.random.seed(42) ->>> random_unlabeled_points = np.random.rand(len(iris.target)) < 0.3 +>>> rng = np.random.RandomState(42) +>>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3 >>> labels = np.copy(iris.target) >>> labels[random_unlabeled_points] = -1 >>> label_prop_model.fit(iris.data, labels) @@ -365,8 +365,8 @@ class LabelPropagation(BaseLabelPropagation): >>> from sklearn.semi_supervised import LabelPropagation >>> label_prop_model = LabelPropagation() >>> iris = datasets.load_iris() - >>> np.random.seed(42) - >>> random_unlabeled_points = np.random.rand(len(iris.target)) < 0.3 + >>> rng = np.random.RandomState(42) + >>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3 >>> labels = np.copy(iris.target) >>> labels[random_unlabeled_points] = -1 >>> label_prop_model.fit(iris.data, labels) @@ -482,8 +482,8 @@ class LabelSpreading(BaseLabelPropagation): >>> from sklearn.semi_supervised import LabelSpreading >>> label_prop_model = LabelSpreading() >>> iris = datasets.load_iris() - >>> np.random.seed(42) - >>> random_unlabeled_points = np.random.rand(len(iris.target)) < 0.3 + >>> rng = np.random.RandomState(42) + >>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3 >>> labels = np.copy(iris.target) >>> labels[random_unlabeled_points] = -1 >>> label_prop_model.fit(iris.data, labels) From 54a5f26b022eabce7622fb96ca6023243220c163 Mon Sep 17 00:00:00 2001 From: Utkarsh Upadhyay Date: Sun, 23 Jul 2017 15:44:55 +0200 Subject: [PATCH 09/13] Move assert_no_warnings to the dedicated test. --- .../tests/test_label_propagation.py | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py index 41ef1f126ab74..aa460a7a72ff1 100644 --- a/sklearn/semi_supervised/tests/test_label_propagation.py +++ b/sklearn/semi_supervised/tests/test_label_propagation.py @@ -39,7 +39,7 @@ def test_distribution(): samples = [[1., 0.], [0., 1.], [1., 1.]] labels = [0, 1, -1] for estimator, parameters in ESTIMATORS: - clf = assert_no_warnings(estimator(**parameters).fit, samples, labels) + clf = estimator(**parameters).fit(samples, labels) if parameters['kernel'] == 'knn': continue # unstable test; changes in k-NN ordering break it assert_array_almost_equal(clf.predict_proba([[1., 0.0]]), @@ -53,7 +53,7 @@ def test_predict(): samples = [[1., 0.], [0., 2.], [1., 3.]] labels = [0, 1, -1] for estimator, parameters in ESTIMATORS: - clf = assert_no_warnings(estimator(**parameters).fit, samples, labels) + clf = estimator(**parameters).fit(samples, labels) assert_array_equal(clf.predict([[0.5, 2.5]]), np.array([1])) @@ -61,7 +61,7 @@ def test_predict_proba(): samples = [[1., 0.], [0., 1.], [1., 2.5]] labels = [0, 1, -1] for estimator, parameters in ESTIMATORS: - clf = assert_no_warnings(estimator(**parameters).fit, samples, labels) + clf = estimator(**parameters).fit(samples, labels) assert_array_almost_equal(clf.predict_proba([[1., 1.]]), np.array([[0.5, 0.5]])) @@ -71,7 +71,7 @@ def test_alpha_deprecation(): y[::3] = -1 lp_default = label_propagation.LabelPropagation(kernel='rbf', gamma=0.1) - lp_default_y = assert_no_warnings(lp_default.fit, X, y).transduction_ + lp_default_y = lp_default.fit(X, y).transduction_ lp_0 = label_propagation.LabelPropagation(alpha=0, kernel='rbf', gamma=0.1) lp_0_y = assert_warns(DeprecationWarning, lp_0.fit, X, y).transduction_ @@ -94,7 +94,7 @@ def test_label_spreading_closed_form(): expected = np.dot(np.linalg.inv(np.eye(len(S)) - alpha * S), Y) expected /= expected.sum(axis=1)[:, np.newaxis] clf = label_propagation.LabelSpreading(max_iter=10000, alpha=alpha) - assert_no_warnings(clf.fit, X, y) + clf.fit(X, y) assert_array_almost_equal(expected, clf.label_distributions_, 4) @@ -110,7 +110,7 @@ def test_label_propagation_closed_form(): clf = label_propagation.LabelPropagation(max_iter=10000, gamma=0.1) - assert_no_warnings(clf.fit, X, y) + clf.fit(X, y) # adopting notation from Zhu et al 2002 T_bar = clf._build_graph() Tuu = T_bar[np.meshgrid(unlabelled_idx, unlabelled_idx, indexing='ij')] @@ -141,8 +141,7 @@ def test_convergence_speed(): # This is a non-regression test for #5774 X = np.array([[1., 0.], [0., 1.], [1., 2.5]]) y = np.array([0, 1, -1]) - mdl = assert_no_warnings(label_propagation.LabelSpreading, kernel='rbf', - max_iter=5000) + mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=5000) mdl.fit(X, y) # this should converge quickly: @@ -154,5 +153,14 @@ def test_convergence_warning(): # This is a non-regression test for #5774 X = np.array([[1., 0.], [0., 1.], [1., 2.5]]) y = np.array([0, 1, -1]) - mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=5) + mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=1) assert_warns(ConvergenceWarning, mdl.fit, X, y) + + mdl = label_propagation.LabelPropagation(kernel='rbf', max_iter=1) + assert_warns(ConvergenceWarning, mdl.fit, X, y) + + mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=500) + assert_no_warnings(mdl.fit, X, y) + + mdl = label_propagation.LabelPropagation(kernel='rbf', max_iter=500) + assert_no_warnings(mdl.fit, X, y) From d0bf21a05936a3aabbaa1375c359af5c252c6468 Mon Sep 17 00:00:00 2001 From: Utkarsh Upadhyay Date: Fri, 28 Jul 2017 09:58:56 +0200 Subject: [PATCH 10/13] Fix bug with max_iter. --- sklearn/semi_supervised/label_propagation.py | 22 +++++++------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/sklearn/semi_supervised/label_propagation.py b/sklearn/semi_supervised/label_propagation.py index c5e344f15c54e..42ea6de02274b 100644 --- a/sklearn/semi_supervised/label_propagation.py +++ b/sklearn/semi_supervised/label_propagation.py @@ -70,13 +70,6 @@ from ..exceptions import ConvergenceWarning -# Helper functions - -def _not_converged(y_truth, y_prediction, tol=1e-3): - """basic convergence check""" - return np.abs(y_truth - y_prediction).sum() > tol - - class BaseLabelPropagation(six.with_metaclass(ABCMeta, BaseEstimator, ClassifierMixin)): """Base class for label propagation module. @@ -265,12 +258,15 @@ def fit(self, X, y): l_previous = np.zeros((self.X_.shape[0], n_classes)) - remaining_iter = self.max_iter unlabeled = unlabeled[:, np.newaxis] if sparse.isspmatrix(graph_matrix): graph_matrix = graph_matrix.tocsr() - while (_not_converged(self.label_distributions_, l_previous, self.tol) - and remaining_iter > 1): + + self.n_iter_ = 0 + while self.n_iter_ < self.max_iter: + if np.abs(self.label_distributions_ - l_previous).sum() < self.tol: + break + l_previous = self.label_distributions_ self.label_distributions_ = safe_sparse_dot( graph_matrix, self.label_distributions_) @@ -286,9 +282,8 @@ def fit(self, X, y): # clamp self.label_distributions_ = np.multiply( alpha, self.label_distributions_) + y_static - remaining_iter -= 1 - - if remaining_iter <= 1: + self.n_iter_ += 1 + else: warnings.warn( 'max_iter=%d was reached without convergence.' % self.max_iter, category=ConvergenceWarning @@ -301,7 +296,6 @@ def fit(self, X, y): transduction = self.classes_[np.argmax(self.label_distributions_, axis=1)] self.transduction_ = transduction.ravel() - self.n_iter_ = self.max_iter - remaining_iter return self From cae57dff09f43d6b328c519a75dd9c560b49cc8c Mon Sep 17 00:00:00 2001 From: Utkarsh Upadhyay Date: Fri, 28 Jul 2017 10:00:43 +0200 Subject: [PATCH 11/13] Add myself to file authors. --- sklearn/semi_supervised/label_propagation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/semi_supervised/label_propagation.py b/sklearn/semi_supervised/label_propagation.py index 42ea6de02274b..b254c5d73da45 100644 --- a/sklearn/semi_supervised/label_propagation.py +++ b/sklearn/semi_supervised/label_propagation.py @@ -52,7 +52,7 @@ Non-Parametric Function Induction in Semi-Supervised Learning. AISTAT 2005 """ -# Authors: Clay Woolam +# Authors: Clay Woolam , Utkarsh Upadhyay # License: BSD from abc import ABCMeta, abstractmethod From 5e2e3014692276f309682a9a86c26b4988031ca7 Mon Sep 17 00:00:00 2001 From: Utkarsh Upadhyay Date: Sat, 29 Jul 2017 14:00:19 +0200 Subject: [PATCH 12/13] Fix flake8 errors. --- sklearn/semi_supervised/label_propagation.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/semi_supervised/label_propagation.py b/sklearn/semi_supervised/label_propagation.py index b254c5d73da45..ff3d3a6d368ee 100644 --- a/sklearn/semi_supervised/label_propagation.py +++ b/sklearn/semi_supervised/label_propagation.py @@ -52,7 +52,8 @@ Non-Parametric Function Induction in Semi-Supervised Learning. AISTAT 2005 """ -# Authors: Clay Woolam , Utkarsh Upadhyay +# Authors: Clay Woolam +# Utkarsh Upadhyay # License: BSD from abc import ABCMeta, abstractmethod @@ -265,7 +266,7 @@ def fit(self, X, y): self.n_iter_ = 0 while self.n_iter_ < self.max_iter: if np.abs(self.label_distributions_ - l_previous).sum() < self.tol: - break + break l_previous = self.label_distributions_ self.label_distributions_ = safe_sparse_dot( From 3dfe11003d63fe11151c2e82fca6af33d050cdc8 Mon Sep 17 00:00:00 2001 From: Utkarsh Upadhyay Date: Thu, 3 Aug 2017 10:51:39 +0200 Subject: [PATCH 13/13] While loop -> for loop. Also, add tests for verify that n_iter_ == max_iter if warning is raised. --- sklearn/semi_supervised/label_propagation.py | 5 ++--- sklearn/semi_supervised/tests/test_label_propagation.py | 2 ++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/sklearn/semi_supervised/label_propagation.py b/sklearn/semi_supervised/label_propagation.py index ff3d3a6d368ee..10eebba86f04e 100644 --- a/sklearn/semi_supervised/label_propagation.py +++ b/sklearn/semi_supervised/label_propagation.py @@ -263,8 +263,7 @@ def fit(self, X, y): if sparse.isspmatrix(graph_matrix): graph_matrix = graph_matrix.tocsr() - self.n_iter_ = 0 - while self.n_iter_ < self.max_iter: + for self.n_iter_ in range(self.max_iter): if np.abs(self.label_distributions_ - l_previous).sum() < self.tol: break @@ -283,12 +282,12 @@ def fit(self, X, y): # clamp self.label_distributions_ = np.multiply( alpha, self.label_distributions_) + y_static - self.n_iter_ += 1 else: warnings.warn( 'max_iter=%d was reached without convergence.' % self.max_iter, category=ConvergenceWarning ) + self.n_iter_ += 1 normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis] self.label_distributions_ /= normalizer diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py index aa460a7a72ff1..8cd0cce41d7e9 100644 --- a/sklearn/semi_supervised/tests/test_label_propagation.py +++ b/sklearn/semi_supervised/tests/test_label_propagation.py @@ -155,9 +155,11 @@ def test_convergence_warning(): y = np.array([0, 1, -1]) mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=1) assert_warns(ConvergenceWarning, mdl.fit, X, y) + assert_equal(mdl.n_iter_, mdl.max_iter) mdl = label_propagation.LabelPropagation(kernel='rbf', max_iter=1) assert_warns(ConvergenceWarning, mdl.fit, X, y) + assert_equal(mdl.n_iter_, mdl.max_iter) mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=500) assert_no_warnings(mdl.fit, X, y)