scikit-learn · jnothman · Jan 10, 2015 · Mar 5, 2015 · amueller · Mar 5, 2015
diff --git a/sklearn/cluster/dbscan_.py b/sklearn/cluster/dbscan_.py
@@ -34,7 +34,8 @@ def dbscan(X, eps=0.5, min_samples=5, metric='minkowski',
 
     min_samples : int, optional
         The number of samples (or total weight) in a neighborhood for a point
-        to be considered as a core point.
+        to be considered as a core point. This number is inclusive of the
+        core point.
 
     metric : string, or callable
         The metric to use when calculating distance between instances in a
@@ -122,7 +123,7 @@ def dbscan(X, eps=0.5, min_samples=5, metric='minkowski',
     labels = -np.ones(X.shape[0], dtype=np.int)
 
     # A list of all core samples found.
-    core_samples = np.flatnonzero(n_neighbors > min_samples)
+    core_samples = np.flatnonzero(n_neighbors >= min_samples)
     index_order = core_samples[random_state.permutation(core_samples.shape[0])]
 
     # label_num is the label given to the new cluster
@@ -170,7 +171,8 @@ class DBSCAN(BaseEstimator, ClusterMixin):
         as in the same neighborhood.
     min_samples : int, optional
         The number of samples (or total weight) in a neighborhood for a point
-        to be considered as a core point.
+        to be considered as a core point. This number is inclusive of the
+        core point.
     metric : string, or callable
         The metric to use when calculating distance between instances in a
         feature array. If metric is a string or callable, it must be one of
@@ -234,8 +236,8 @@ def fit(self, X, y=None, sample_weight=None):
             A feature array, or array of distances between samples if
             ``metric='precomputed'``.
         sample_weight : array, shape (n_samples,), optional
-            Weight of each sample, such that a sample with weight greater
-            than ``min_samples`` is automatically a core sample; a sample with
+            Weight of each sample, such that a sample with weight at least
+            ``min_samples`` is automatically a core sample; a sample with
             negative weight may inhibit its eps-neighbor from being core.
             Note that weights are absolute, and default to 1.
         """
@@ -260,8 +262,8 @@ def fit_predict(self, X, y=None, sample_weight=None):
             A feature array, or array of distances between samples if
             ``metric='precomputed'``.
         sample_weight : array, shape (n_samples,), optional
-            Weight of each sample, such that a sample with weight greater
-            than ``min_samples`` is automatically a core sample; a sample with
+            Weight of each sample, such that a sample with weight at least
+            ``min_samples`` is automatically a core sample; a sample with
             negative weight may inhibit its eps-neighbor from being core.
             Note that weights are absolute, and default to 1.
 

diff --git a/sklearn/cluster/tests/test_dbscan.py b/sklearn/cluster/tests/test_dbscan.py
@@ -12,6 +12,8 @@
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_raises
+from sklearn.utils.testing import assert_in
+from sklearn.utils.testing import assert_not_in
 from sklearn.cluster.dbscan_ import DBSCAN
 from sklearn.cluster.dbscan_ import dbscan
 from sklearn.cluster.tests.common import generate_clustered_data
@@ -79,11 +81,11 @@ def test_dbscan_sparse():
 
 def test_dbscan_no_core_samples():
     rng = np.random.RandomState(0)
-    X = rng.rand(40, 10)
+    X = rng.rand(15, 10)
     X[X < .8] = 0
 
     for X_ in [X, sparse.csr_matrix(X)]:
-        db = DBSCAN().fit(X_)
+        db = DBSCAN(min_samples=50).fit(X_)
         assert_array_equal(db.components_, np.empty((0, X_.shape[1])))
         assert_array_equal(db.labels_, -1)
         assert_equal(db.core_sample_indices_.shape, (0,))
@@ -185,33 +187,44 @@ def test_pickle():
     assert_equal(type(pickle.loads(s)), obj.__class__)
 
 
+def test_boundaries():
+    # ensure min_samples is inclusive of core point
+    core, _ = dbscan([[0], [1]], eps=2, min_samples=2)
+    assert_in(0, core)
+    # ensure eps is inclusive of circumference
+    core, _ = dbscan([[0], [1], [1]], eps=1, min_samples=2)
+    assert_in(0, core)
+    core, _ = dbscan([[0], [1], [1]], eps=.99, min_samples=2)
+    assert_not_in(0, core)
+
+
 def test_weighted_dbscan():
     # ensure sample_weight is validated
     assert_raises(ValueError, dbscan, [[0], [1]], sample_weight=[2])
     assert_raises(ValueError, dbscan, [[0], [1]], sample_weight=[2, 3, 4])
 
     # ensure sample_weight has an effect
     assert_array_equal([], dbscan([[0], [1]], sample_weight=None,
-                                  min_samples=5)[0])
+                                  min_samples=6)[0])
     assert_array_equal([], dbscan([[0], [1]], sample_weight=[5, 5],
-                                  min_samples=5)[0])
+                                  min_samples=6)[0])
     assert_array_equal([0], dbscan([[0], [1]], sample_weight=[6, 5],
-                                   min_samples=5)[0])
+                                   min_samples=6)[0])
     assert_array_equal([0, 1], dbscan([[0], [1]], sample_weight=[6, 6],
-                                      min_samples=5)[0])
+                                      min_samples=6)[0])
 
     # points within eps of each other:
     assert_array_equal([0, 1], dbscan([[0], [1]], eps=1.5,
-                                      sample_weight=[5, 1], min_samples=5)[0])
+                                      sample_weight=[5, 1], min_samples=6)[0])
     # and effect of non-positive and non-integer sample_weight:
     assert_array_equal([], dbscan([[0], [1]], sample_weight=[5, 0],
-                                  eps=1.5, min_samples=5)[0])
-    assert_array_equal([0, 1], dbscan([[0], [1]], sample_weight=[5, 0.1],
-                                      eps=1.5, min_samples=5)[0])
+                                  eps=1.5, min_samples=6)[0])
+    assert_array_equal([0, 1], dbscan([[0], [1]], sample_weight=[5.9, 0.1],
+                                      eps=1.5, min_samples=6)[0])
     assert_array_equal([0, 1], dbscan([[0], [1]], sample_weight=[6, 0],
-                                      eps=1.5, min_samples=5)[0])
+                                      eps=1.5, min_samples=6)[0])
     assert_array_equal([], dbscan([[0], [1]], sample_weight=[6, -1],
-                                  eps=1.5, min_samples=5)[0])
+                                  eps=1.5, min_samples=6)[0])
 
     # for non-negative sample_weight, cores should be identical to repetition
     rng = np.random.RandomState(42)