From 75762787b1408bb26a76dd49036ad0b1d458ed5a Mon Sep 17 00:00:00 2001 From: Christian Braune Date: Sun, 29 Oct 2017 10:22:28 +0100 Subject: [PATCH 01/13] fixes #10037 --- sklearn/datasets/samples_generator.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/sklearn/datasets/samples_generator.py b/sklearn/datasets/samples_generator.py index 259c8f1c13ee3..7909e26d755f4 100644 --- a/sklearn/datasets/samples_generator.py +++ b/sklearn/datasets/samples_generator.py @@ -614,19 +614,22 @@ def make_circles(n_samples=100, shuffle=True, noise=None, random_state=None, if factor > 1 or factor < 0: raise ValueError("'factor' has to be between 0 and 1.") + n_samples_out = n_samples // 2 + n_samples_in = n_samples - n_samples_out + generator = check_random_state(random_state) - # so as not to have the first point = last point, we add one and then - # remove it. - linspace = np.linspace(0, 2 * np.pi, n_samples // 2 + 1)[:-1] - outer_circ_x = np.cos(linspace) - outer_circ_y = np.sin(linspace) - inner_circ_x = outer_circ_x * factor - inner_circ_y = outer_circ_y * factor + # so as not to have the first point = last point, we set endpoint=False + linspace_out = np.linspace(0, 2 * np.pi, n_samples_out, endpoint=False) + linspace_in = np.linspace(0, 2 * np.pi, n_samples_in, endpoint=True) + outer_circ_x = np.cos(linspace_out) + outer_circ_y = np.sin(linspace_out) + inner_circ_x = np.cos(linspace_in) * factor + inner_circ_y = np.sin(linspace_in) * factor X = np.vstack((np.append(outer_circ_x, inner_circ_x), np.append(outer_circ_y, inner_circ_y))).T - y = np.hstack([np.zeros(n_samples // 2, dtype=np.intp), - np.ones(n_samples // 2, dtype=np.intp)]) + y = np.hstack([np.zeros(n_samples_out, dtype=np.intp), + np.ones(n_samples_in, dtype=np.intp)]) if shuffle: X, y = util_shuffle(X, y, random_state=generator) From effc6c21ebd9e0f6539daf611b7131953da062de Mon Sep 17 00:00:00 2001 From: Christian Braune Date: Tue, 31 Oct 2017 10:36:41 +0100 Subject: [PATCH 02/13] tests for #10037, tests for odd number of samples and whether generated points lie on the expected circles similar to test_make_moons() --- sklearn/datasets/samples_generator.py | 20 ++++++++++--------- .../datasets/tests/test_samples_generator.py | 14 +++++++++++++ 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/sklearn/datasets/samples_generator.py b/sklearn/datasets/samples_generator.py index 259c8f1c13ee3..a06c706183546 100644 --- a/sklearn/datasets/samples_generator.py +++ b/sklearn/datasets/samples_generator.py @@ -613,20 +613,22 @@ def make_circles(n_samples=100, shuffle=True, noise=None, random_state=None, if factor > 1 or factor < 0: raise ValueError("'factor' has to be between 0 and 1.") + n_samples_out = n_samples // 2 + n_samples_in = n_samples - n_samples_out generator = check_random_state(random_state) - # so as not to have the first point = last point, we add one and then - # remove it. - linspace = np.linspace(0, 2 * np.pi, n_samples // 2 + 1)[:-1] - outer_circ_x = np.cos(linspace) - outer_circ_y = np.sin(linspace) - inner_circ_x = outer_circ_x * factor - inner_circ_y = outer_circ_y * factor + # so as not to have the first point = last point, we set endpoint=False + linspace_out = np.linspace(0, 2 * np.pi, n_samples_out, endpoint=False) + linspace_in = np.linspace(0, 2 * np.pi, n_samples_in, endpoint=True) + outer_circ_x = np.cos(linspace_out) + outer_circ_y = np.sin(linspace_out) + inner_circ_x = np.cos(linspace_in) * factor + inner_circ_y = np.sin(linspace_in) * factor X = np.vstack((np.append(outer_circ_x, inner_circ_x), np.append(outer_circ_y, inner_circ_y))).T - y = np.hstack([np.zeros(n_samples // 2, dtype=np.intp), - np.ones(n_samples // 2, dtype=np.intp)]) + y = np.hstack([np.zeros(n_samples_out, dtype=np.intp), + np.ones(n_samples_in, dtype=np.intp)]) if shuffle: X, y = util_shuffle(X, y, random_state=generator) diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py index 787ffb872dd5a..67779e3e30526 100644 --- a/sklearn/datasets/tests/test_samples_generator.py +++ b/sklearn/datasets/tests/test_samples_generator.py @@ -25,6 +25,7 @@ from sklearn.datasets import make_friedman3 from sklearn.datasets import make_low_rank_matrix from sklearn.datasets import make_moons +from sklearn.datasets import make_circles from sklearn.datasets import make_sparse_coded_signal from sklearn.datasets import make_sparse_uncorrelated from sklearn.datasets import make_spd_matrix @@ -385,3 +386,16 @@ def test_make_moons(): dist_sqr = ((x - center) ** 2).sum() assert_almost_equal(dist_sqr, 1.0, err_msg="Point is not on expected unit circle") + + +def test_make_circles(): + f = 0.3 + X, y = make_circles(7, shuffle=False, noise=None, factor=f) + assert_equal(X.shape, (7, 2), "X shape mismatch") + assert_equal(y.shape, (7,), "y shape mismatch") + center = [0.0, 0.0] + for x, label in zip(X, y): + dist_sqr = ((x - center) ** 2).sum() + dist_exp = 1.0 if label == 0 else f**2 + assert_almost_equal(dist_sqr, dist_exp, + err_msg="Point is not on expected circle") From 18ee5d8a2ed9fccdf23e69ad6f533e2593b6b712 Mon Sep 17 00:00:00 2001 From: Christian Braune Date: Tue, 31 Oct 2017 10:56:15 +0100 Subject: [PATCH 03/13] nasty doubled lines of code removed --- sklearn/datasets/samples_generator.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sklearn/datasets/samples_generator.py b/sklearn/datasets/samples_generator.py index 782ebb1faf083..d1086791201df 100644 --- a/sklearn/datasets/samples_generator.py +++ b/sklearn/datasets/samples_generator.py @@ -613,9 +613,7 @@ def make_circles(n_samples=100, shuffle=True, noise=None, random_state=None, if factor > 1 or factor < 0: raise ValueError("'factor' has to be between 0 and 1.") - n_samples_out = n_samples // 2 - n_samples_in = n_samples - n_samples_out - + n_samples_out = n_samples // 2 n_samples_in = n_samples - n_samples_out From 32b2ffbfc524daf9813e86de66ec4a75548527dc Mon Sep 17 00:00:00 2001 From: Christian Braune Date: Tue, 31 Oct 2017 16:05:31 +0100 Subject: [PATCH 04/13] changes according to comments in PR --- sklearn/datasets/samples_generator.py | 4 ++-- sklearn/datasets/tests/test_samples_generator.py | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/sklearn/datasets/samples_generator.py b/sklearn/datasets/samples_generator.py index d1086791201df..1b110d25ccb22 100644 --- a/sklearn/datasets/samples_generator.py +++ b/sklearn/datasets/samples_generator.py @@ -613,14 +613,14 @@ def make_circles(n_samples=100, shuffle=True, noise=None, random_state=None, if factor > 1 or factor < 0: raise ValueError("'factor' has to be between 0 and 1.") - + n_samples_out = n_samples // 2 n_samples_in = n_samples - n_samples_out generator = check_random_state(random_state) # so as not to have the first point = last point, we set endpoint=False linspace_out = np.linspace(0, 2 * np.pi, n_samples_out, endpoint=False) - linspace_in = np.linspace(0, 2 * np.pi, n_samples_in, endpoint=True) + linspace_in = np.linspace(0, 2 * np.pi, n_samples_in, endpoint=False) outer_circ_x = np.cos(linspace_out) outer_circ_y = np.sin(linspace_out) inner_circ_x = np.cos(linspace_in) * factor diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py index 67779e3e30526..59c52299e901a 100644 --- a/sklearn/datasets/tests/test_samples_generator.py +++ b/sklearn/datasets/tests/test_samples_generator.py @@ -399,3 +399,10 @@ def test_make_circles(): dist_exp = 1.0 if label == 0 else f**2 assert_almost_equal(dist_sqr, dist_exp, err_msg="Point is not on expected circle") + + X, y = make_circles(10, shuffle=False, noise=None) + assert_equal(X.shape, (10, 2), "X shape mismatch") + assert_equal(y.shape, (10,), "y shape mismatch") + + assert_equal(X[y == 0].shape, (5, 2), + err_msg="Samples not correctly distributed across circles.") From 4c99154fb842658a28ca3b3826ffcbf3a29705a8 Mon Sep 17 00:00:00 2001 From: Christian Braune Date: Tue, 31 Oct 2017 16:15:02 +0100 Subject: [PATCH 05/13] assert_equal obviously has a different signature. --- sklearn/datasets/tests/test_samples_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py index 59c52299e901a..79fec258de383 100644 --- a/sklearn/datasets/tests/test_samples_generator.py +++ b/sklearn/datasets/tests/test_samples_generator.py @@ -405,4 +405,4 @@ def test_make_circles(): assert_equal(y.shape, (10,), "y shape mismatch") assert_equal(X[y == 0].shape, (5, 2), - err_msg="Samples not correctly distributed across circles.") + "Samples not correctly distributed across circles.") From 8a86067a512dfe78ff51d806f6126c3ac3cb94e1 Mon Sep 17 00:00:00 2001 From: Christian Braune Date: Fri, 3 Nov 2017 05:43:48 +0100 Subject: [PATCH 06/13] Adjusted documentation for make_circles Added a test to check if really only factors in (0, 1) (excluding borders) are accepted Adjusted factor check (1.0 was accepted before, though doc said otherwise) --- sklearn/datasets/samples_generator.py | 7 ++++--- sklearn/datasets/tests/test_samples_generator.py | 11 ++++++----- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/sklearn/datasets/samples_generator.py b/sklearn/datasets/samples_generator.py index 1b110d25ccb22..fdde601f2c677 100644 --- a/sklearn/datasets/samples_generator.py +++ b/sklearn/datasets/samples_generator.py @@ -585,7 +585,8 @@ def make_circles(n_samples=100, shuffle=True, noise=None, random_state=None, Parameters ---------- n_samples : int, optional (default=100) - The total number of points generated. + The total number of points generated. If odd, the inner circle will + have one point more than the outer circle. shuffle : bool, optional (default=True) Whether to shuffle the samples. @@ -599,7 +600,7 @@ def make_circles(n_samples=100, shuffle=True, noise=None, random_state=None, If None, the random number generator is the RandomState instance used by `np.random`. - factor : double < 1 (default=.8) + factor : 0 < double < 1 (default=.8) Scale factor between inner and outer circle. Returns @@ -611,7 +612,7 @@ def make_circles(n_samples=100, shuffle=True, noise=None, random_state=None, The integer labels (0 or 1) for class membership of each sample. """ - if factor > 1 or factor < 0: + if factor >= 1 or factor < 0: raise ValueError("'factor' has to be between 0 and 1.") n_samples_out = n_samples // 2 diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py index 79fec258de383..811d6b78bf5fa 100644 --- a/sklearn/datasets/tests/test_samples_generator.py +++ b/sklearn/datasets/tests/test_samples_generator.py @@ -400,9 +400,10 @@ def test_make_circles(): assert_almost_equal(dist_sqr, dist_exp, err_msg="Point is not on expected circle") - X, y = make_circles(10, shuffle=False, noise=None) - assert_equal(X.shape, (10, 2), "X shape mismatch") - assert_equal(y.shape, (10,), "y shape mismatch") - - assert_equal(X[y == 0].shape, (5, 2), + assert_equal(X[y == 0].shape, (3, 2), + "Samples not correctly distributed across circles.") + assert_equal(X[y == 1].shape, (4, 2), "Samples not correctly distributed across circles.") + + assert_raises(ValueError, make_circles, factor=-0.01) + assert_raises(ValueError, make_circles, factor=1.) From 9ce24e47fdc528cd650fbe017ecbc5fc4658d8ca Mon Sep 17 00:00:00 2001 From: Christian Braune Date: Fri, 3 Nov 2017 05:51:53 +0100 Subject: [PATCH 07/13] added entry under "Decomposition, ..." as another datasets-related bug was already mentioned in there. --- doc/whats_new/_contributors.rst | 2 ++ doc/whats_new/v0.20.rst | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/doc/whats_new/_contributors.rst b/doc/whats_new/_contributors.rst index dfbc319da88f4..370362d67292f 100644 --- a/doc/whats_new/_contributors.rst +++ b/doc/whats_new/_contributors.rst @@ -141,3 +141,5 @@ .. _Neeraj Gangwar: http://neerajgangwar.in .. _Arthur Mensch: https://amensch.fr + +.. _Christian Braune: https://github.com/christianbraune79 diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index 5af76499bcb39..a8eca8190bdf6 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -135,6 +135,10 @@ Decomposition, manifold learning and clustering wrapped estimator and its parameter. :issue:`9999` by :user:`Marcus Voss ` and `Joel Nothman`_. +- Fixed a bug in :func:`datasets.make_circles`, where no odd number of data + points could be generated. :issue:`10037` by :user:`Christian Braune + `_. + Metrics - Fixed a bug due to floating point error in :func:`metrics.roc_auc_score` with From 0f1321ad6e8ddd028587bff20b6ed9d162b1da3f Mon Sep 17 00:00:00 2001 From: Christian Braune Date: Thu, 9 Nov 2017 20:21:48 +0100 Subject: [PATCH 08/13] removed wrong entry --- doc/whats_new/_contributors.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/whats_new/_contributors.rst b/doc/whats_new/_contributors.rst index 370362d67292f..dfbc319da88f4 100644 --- a/doc/whats_new/_contributors.rst +++ b/doc/whats_new/_contributors.rst @@ -141,5 +141,3 @@ .. _Neeraj Gangwar: http://neerajgangwar.in .. _Arthur Mensch: https://amensch.fr - -.. _Christian Braune: https://github.com/christianbraune79 From ad1af5db9cd3502028ccb2ec7caa460b1ec34d30 Mon Sep 17 00:00:00 2001 From: Christian Braune Date: Thu, 9 Nov 2017 20:26:32 +0100 Subject: [PATCH 09/13] all tests for odd and even case --- .../datasets/tests/test_samples_generator.py | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py index 811d6b78bf5fa..6bc447495d421 100644 --- a/sklearn/datasets/tests/test_samples_generator.py +++ b/sklearn/datasets/tests/test_samples_generator.py @@ -390,20 +390,22 @@ def test_make_moons(): def test_make_circles(): f = 0.3 - X, y = make_circles(7, shuffle=False, noise=None, factor=f) - assert_equal(X.shape, (7, 2), "X shape mismatch") - assert_equal(y.shape, (7,), "y shape mismatch") - center = [0.0, 0.0] - for x, label in zip(X, y): - dist_sqr = ((x - center) ** 2).sum() - dist_exp = 1.0 if label == 0 else f**2 - assert_almost_equal(dist_sqr, dist_exp, - err_msg="Point is not on expected circle") - - assert_equal(X[y == 0].shape, (3, 2), - "Samples not correctly distributed across circles.") - assert_equal(X[y == 1].shape, (4, 2), - "Samples not correctly distributed across circles.") + + for (n_samples, n_outer, n_inner) in [(7, 3, 4), (8, 4, 4)]: + X, y = make_circles(n_samples, shuffle=False, noise=None, factor=f) + assert_equal(X.shape, (n_samples, 2), "X shape mismatch") + assert_equal(y.shape, (n_samples,), "y shape mismatch") + center = [0.0, 0.0] + for x, label in zip(X, y): + dist_sqr = ((x - center) ** 2).sum() + dist_exp = 1.0 if label == 0 else f**2 + assert_almost_equal(dist_sqr, dist_exp, + err_msg="Point is not on expected circle") + + assert_equal(X[y == 0].shape, (n_outer, 2), + "Samples not correctly distributed across circles.") + assert_equal(X[y == 1].shape, (n_inner, 2), + "Samples not correctly distributed across circles.") assert_raises(ValueError, make_circles, factor=-0.01) assert_raises(ValueError, make_circles, factor=1.) From ad07946e2f1121db205dda55b27cd29c8038df96 Mon Sep 17 00:00:00 2001 From: Christian Braune Date: Thu, 9 Nov 2017 20:32:39 +0100 Subject: [PATCH 10/13] added final comment --- sklearn/datasets/tests/test_samples_generator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py index 6bc447495d421..cbd889f496fdb 100644 --- a/sklearn/datasets/tests/test_samples_generator.py +++ b/sklearn/datasets/tests/test_samples_generator.py @@ -391,6 +391,7 @@ def test_make_moons(): def test_make_circles(): f = 0.3 + # Testing odd and even case, because in the past make_circles always created an even number of samples. for (n_samples, n_outer, n_inner) in [(7, 3, 4), (8, 4, 4)]: X, y = make_circles(n_samples, shuffle=False, noise=None, factor=f) assert_equal(X.shape, (n_samples, 2), "X shape mismatch") From 6126a5c19ca7e5b684ccc04c6ca2e10bad563784 Mon Sep 17 00:00:00 2001 From: Christian Braune Date: Thu, 9 Nov 2017 22:03:05 +0100 Subject: [PATCH 11/13] '107 > 79' fixed --- sklearn/datasets/tests/test_samples_generator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py index cbd889f496fdb..b0671fb5a198a 100644 --- a/sklearn/datasets/tests/test_samples_generator.py +++ b/sklearn/datasets/tests/test_samples_generator.py @@ -391,7 +391,8 @@ def test_make_moons(): def test_make_circles(): f = 0.3 - # Testing odd and even case, because in the past make_circles always created an even number of samples. + # Testing odd and even case, because in the past make_circles always + # created an even number of samples. for (n_samples, n_outer, n_inner) in [(7, 3, 4), (8, 4, 4)]: X, y = make_circles(n_samples, shuffle=False, noise=None, factor=f) assert_equal(X.shape, (n_samples, 2), "X shape mismatch") From 2a62edcf183666a02c15b4dc71480a8868114937 Mon Sep 17 00:00:00 2001 From: Christian Braune Date: Fri, 10 Nov 2017 21:25:25 +0100 Subject: [PATCH 12/13] refactoring f into factor --- sklearn/datasets/tests/test_samples_generator.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py index b0671fb5a198a..17dda9ec32fd6 100644 --- a/sklearn/datasets/tests/test_samples_generator.py +++ b/sklearn/datasets/tests/test_samples_generator.py @@ -389,18 +389,18 @@ def test_make_moons(): def test_make_circles(): - f = 0.3 + factor = 0.3 - # Testing odd and even case, because in the past make_circles always - # created an even number of samples. for (n_samples, n_outer, n_inner) in [(7, 3, 4), (8, 4, 4)]: - X, y = make_circles(n_samples, shuffle=False, noise=None, factor=f) + # Testing odd and even case, because in the past make_circles always + # created an even number of samples. + X, y = make_circles(n_samples, shuffle=False, noise=None, factor=factor) assert_equal(X.shape, (n_samples, 2), "X shape mismatch") assert_equal(y.shape, (n_samples,), "y shape mismatch") center = [0.0, 0.0] for x, label in zip(X, y): dist_sqr = ((x - center) ** 2).sum() - dist_exp = 1.0 if label == 0 else f**2 + dist_exp = 1.0 if label == 0 else factor**2 assert_almost_equal(dist_sqr, dist_exp, err_msg="Point is not on expected circle") From e19cecc2f994081e6c9e5b9521e53ad125eee6db Mon Sep 17 00:00:00 2001 From: Christian Braune Date: Fri, 10 Nov 2017 21:44:46 +0100 Subject: [PATCH 13/13] pep8 --- sklearn/datasets/tests/test_samples_generator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py index 17dda9ec32fd6..8b9810489bab6 100644 --- a/sklearn/datasets/tests/test_samples_generator.py +++ b/sklearn/datasets/tests/test_samples_generator.py @@ -394,7 +394,8 @@ def test_make_circles(): for (n_samples, n_outer, n_inner) in [(7, 3, 4), (8, 4, 4)]: # Testing odd and even case, because in the past make_circles always # created an even number of samples. - X, y = make_circles(n_samples, shuffle=False, noise=None, factor=factor) + X, y = make_circles(n_samples, shuffle=False, noise=None, + factor=factor) assert_equal(X.shape, (n_samples, 2), "X shape mismatch") assert_equal(y.shape, (n_samples,), "y shape mismatch") center = [0.0, 0.0]