diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py index cba5e8dfd2900..c5dbbed17847e 100644 --- a/maint_tools/test_docstrings.py +++ b/maint_tools/test_docstrings.py @@ -30,7 +30,6 @@ "ColumnTransformer", "ComplementNB", "CountVectorizer", - "DBSCAN", "DecisionTreeClassifier", "DecisionTreeRegressor", "DictVectorizer", diff --git a/sklearn/cluster/_dbscan.py b/sklearn/cluster/_dbscan.py index 097202759ba90..35c17bbae061a 100644 --- a/sklearn/cluster/_dbscan.py +++ b/sklearn/cluster/_dbscan.py @@ -183,7 +183,7 @@ class DBSCAN(ClusterMixin, BaseEstimator): The number of samples (or total weight) in a neighborhood for a point to be considered as a core point. This includes the point itself. - metric : string, or callable, default='euclidean' + metric : str, or callable, default='euclidean' The metric to use when calculating distance between instances in a feature array. If metric is a string or callable, it must be one of the options allowed by :func:`sklearn.metrics.pairwise_distances` for @@ -239,18 +239,6 @@ class DBSCAN(ClusterMixin, BaseEstimator): .. versionadded:: 0.24 - Examples - -------- - >>> from sklearn.cluster import DBSCAN - >>> import numpy as np - >>> X = np.array([[1, 2], [2, 2], [2, 3], - ... [8, 7], [8, 8], [25, 80]]) - >>> clustering = DBSCAN(eps=3, min_samples=2).fit(X) - >>> clustering.labels_ - array([ 0, 0, 0, 1, 1, -1]) - >>> clustering - DBSCAN(eps=3, min_samples=2) - See Also -------- OPTICS : A similar clustering at multiple values of eps. Our implementation @@ -289,6 +277,18 @@ class DBSCAN(ClusterMixin, BaseEstimator): Schubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, X. (2017). DBSCAN revisited, revisited: why and how you should (still) use DBSCAN. ACM Transactions on Database Systems (TODS), 42(3), 19. + + Examples + -------- + >>> from sklearn.cluster import DBSCAN + >>> import numpy as np + >>> X = np.array([[1, 2], [2, 2], [2, 3], + ... [8, 7], [8, 8], [25, 80]]) + >>> clustering = DBSCAN(eps=3, min_samples=2).fit(X) + >>> clustering.labels_ + array([ 0, 0, 0, 1, 1, -1]) + >>> clustering + DBSCAN(eps=3, min_samples=2) """ def __init__( @@ -323,19 +323,19 @@ def fit(self, X, y=None, sample_weight=None): ``metric='precomputed'``. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``. + y : Ignored + Not used, present here for API consistency by convention. + sample_weight : array-like of shape (n_samples,), default=None Weight of each sample, such that a sample with a weight of at least ``min_samples`` is by itself a core sample; a sample with a negative weight may inhibit its eps-neighbor from being core. Note that weights are absolute, and default to 1. - y : Ignored - Not used, present here for API consistency by convention. - Returns ------- - self - + self : object + Returns a fitted instance of self. """ X = self._validate_data(X, accept_sparse="csr") @@ -394,8 +394,7 @@ def fit(self, X, y=None, sample_weight=None): return self def fit_predict(self, X, y=None, sample_weight=None): - """Perform DBSCAN clustering from features or distance matrix, - and return cluster labels. + """Compute clusters from a data or distance matrix and predict labels. Parameters ---------- @@ -405,15 +404,15 @@ def fit_predict(self, X, y=None, sample_weight=None): ``metric='precomputed'``. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``. + y : Ignored + Not used, present here for API consistency by convention. + sample_weight : array-like of shape (n_samples,), default=None Weight of each sample, such that a sample with a weight of at least ``min_samples`` is by itself a core sample; a sample with a negative weight may inhibit its eps-neighbor from being core. Note that weights are absolute, and default to 1. - y : Ignored - Not used, present here for API consistency by convention. - Returns ------- labels : ndarray of shape (n_samples,)