diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index 5af76499bcb39..a8eca8190bdf6 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -135,6 +135,10 @@ Decomposition, manifold learning and clustering wrapped estimator and its parameter. :issue:`9999` by :user:`Marcus Voss ` and `Joel Nothman`_. +- Fixed a bug in :func:`datasets.make_circles`, where no odd number of data + points could be generated. :issue:`10037` by :user:`Christian Braune + `_. + Metrics - Fixed a bug due to floating point error in :func:`metrics.roc_auc_score` with diff --git a/sklearn/datasets/samples_generator.py b/sklearn/datasets/samples_generator.py index 259c8f1c13ee3..fdde601f2c677 100644 --- a/sklearn/datasets/samples_generator.py +++ b/sklearn/datasets/samples_generator.py @@ -585,7 +585,8 @@ def make_circles(n_samples=100, shuffle=True, noise=None, random_state=None, Parameters ---------- n_samples : int, optional (default=100) - The total number of points generated. + The total number of points generated. If odd, the inner circle will + have one point more than the outer circle. shuffle : bool, optional (default=True) Whether to shuffle the samples. @@ -599,7 +600,7 @@ def make_circles(n_samples=100, shuffle=True, noise=None, random_state=None, If None, the random number generator is the RandomState instance used by `np.random`. - factor : double < 1 (default=.8) + factor : 0 < double < 1 (default=.8) Scale factor between inner and outer circle. Returns @@ -611,22 +612,25 @@ def make_circles(n_samples=100, shuffle=True, noise=None, random_state=None, The integer labels (0 or 1) for class membership of each sample. """ - if factor > 1 or factor < 0: + if factor >= 1 or factor < 0: raise ValueError("'factor' has to be between 0 and 1.") + n_samples_out = n_samples // 2 + n_samples_in = n_samples - n_samples_out + generator = check_random_state(random_state) - # so as not to have the first point = last point, we add one and then - # remove it. - linspace = np.linspace(0, 2 * np.pi, n_samples // 2 + 1)[:-1] - outer_circ_x = np.cos(linspace) - outer_circ_y = np.sin(linspace) - inner_circ_x = outer_circ_x * factor - inner_circ_y = outer_circ_y * factor + # so as not to have the first point = last point, we set endpoint=False + linspace_out = np.linspace(0, 2 * np.pi, n_samples_out, endpoint=False) + linspace_in = np.linspace(0, 2 * np.pi, n_samples_in, endpoint=False) + outer_circ_x = np.cos(linspace_out) + outer_circ_y = np.sin(linspace_out) + inner_circ_x = np.cos(linspace_in) * factor + inner_circ_y = np.sin(linspace_in) * factor X = np.vstack((np.append(outer_circ_x, inner_circ_x), np.append(outer_circ_y, inner_circ_y))).T - y = np.hstack([np.zeros(n_samples // 2, dtype=np.intp), - np.ones(n_samples // 2, dtype=np.intp)]) + y = np.hstack([np.zeros(n_samples_out, dtype=np.intp), + np.ones(n_samples_in, dtype=np.intp)]) if shuffle: X, y = util_shuffle(X, y, random_state=generator) diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py index 787ffb872dd5a..8b9810489bab6 100644 --- a/sklearn/datasets/tests/test_samples_generator.py +++ b/sklearn/datasets/tests/test_samples_generator.py @@ -25,6 +25,7 @@ from sklearn.datasets import make_friedman3 from sklearn.datasets import make_low_rank_matrix from sklearn.datasets import make_moons +from sklearn.datasets import make_circles from sklearn.datasets import make_sparse_coded_signal from sklearn.datasets import make_sparse_uncorrelated from sklearn.datasets import make_spd_matrix @@ -385,3 +386,29 @@ def test_make_moons(): dist_sqr = ((x - center) ** 2).sum() assert_almost_equal(dist_sqr, 1.0, err_msg="Point is not on expected unit circle") + + +def test_make_circles(): + factor = 0.3 + + for (n_samples, n_outer, n_inner) in [(7, 3, 4), (8, 4, 4)]: + # Testing odd and even case, because in the past make_circles always + # created an even number of samples. + X, y = make_circles(n_samples, shuffle=False, noise=None, + factor=factor) + assert_equal(X.shape, (n_samples, 2), "X shape mismatch") + assert_equal(y.shape, (n_samples,), "y shape mismatch") + center = [0.0, 0.0] + for x, label in zip(X, y): + dist_sqr = ((x - center) ** 2).sum() + dist_exp = 1.0 if label == 0 else factor**2 + assert_almost_equal(dist_sqr, dist_exp, + err_msg="Point is not on expected circle") + + assert_equal(X[y == 0].shape, (n_outer, 2), + "Samples not correctly distributed across circles.") + assert_equal(X[y == 1].shape, (n_inner, 2), + "Samples not correctly distributed across circles.") + + assert_raises(ValueError, make_circles, factor=-0.01) + assert_raises(ValueError, make_circles, factor=1.)