From e27160c1e75a6945aa90e072b3598370f59f17eb Mon Sep 17 00:00:00 2001 From: Meekail Zain Date: Fri, 3 Feb 2023 19:36:42 -0500 Subject: [PATCH 1/2] DOC Adds `HDBSCAN.dbscan_clustering` section to `plot_hdbscan.py` --- examples/cluster/plot_hdbscan.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/examples/cluster/plot_hdbscan.py b/examples/cluster/plot_hdbscan.py index 14cb9de44cfa1..5947273be4405 100644 --- a/examples/cluster/plot_hdbscan.py +++ b/examples/cluster/plot_hdbscan.py @@ -195,7 +195,6 @@ def plot(X, labels, probabilities=None, parameters=None, ground_truth=False, ax= # ^^^^^^^^^^^^^ # `min_samples` is the number of samples in a neighborhood for a point to # be considered as a core point, including the point itself. - # `min_samples` defaults to `min_cluster_size`. # Similarly to `min_cluster_size`, larger values for `min_samples` increase # the model's robustness to noise, but risks ignoring or discarding @@ -213,3 +212,27 @@ def plot(X, labels, probabilities=None, parameters=None, ground_truth=False, ax= labels = hdb.labels_ plot(X, labels, hdb.probabilities_, param, ax=axes[i]) + +# %% +# `dbscan_clustering` +# ^^^^^^^^^^^^^^^^^^^ +# During `fit`, `HDBSCAN` builds a single-linkage tree which encodes the +# clustering of all points across all values of :class:`~cluster.DBSCAN`'s +# `eps` parameter. +# We can thus plot and evaluate these clusterings efficiently without fully +# recomputing intermediate values such as core-distances, mutual-reachability, +# and the minimum spanning tree. All we need to do is specify the `cut_distance` +# (equivalent to `eps`) we want to cluster with. + +PARAM = ( + {"cut_distance": 0.1}, + {"cut_distance": 0.5}, + {"cut_distance": 1.0}, +) +hdb = HDBSCAN() +hdb.fit(X) +fig, axes = plt.subplots(3, 1, figsize=(10, 12)) +for i, param in enumerate(PARAM): + labels = hdb.dbscan_clustering(**param) + + plot(X, labels, hdb.probabilities_, param, ax=axes[i]) From a05cf662d31192afb4a9dfc33d391537c361b3b5 Mon Sep 17 00:00:00 2001 From: Meekail Zain <34613774+Micky774@users.noreply.github.com> Date: Tue, 7 Feb 2023 11:18:31 -0500 Subject: [PATCH 2/2] Update examples/cluster/plot_hdbscan.py Co-authored-by: Julien Jerphanion --- examples/cluster/plot_hdbscan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/cluster/plot_hdbscan.py b/examples/cluster/plot_hdbscan.py index 5947273be4405..8e4678cdfb134 100644 --- a/examples/cluster/plot_hdbscan.py +++ b/examples/cluster/plot_hdbscan.py @@ -231,7 +231,7 @@ def plot(X, labels, probabilities=None, parameters=None, ground_truth=False, ax= ) hdb = HDBSCAN() hdb.fit(X) -fig, axes = plt.subplots(3, 1, figsize=(10, 12)) +fig, axes = plt.subplots(len(PARAM), 1, figsize=(10, 12)) for i, param in enumerate(PARAM): labels = hdb.dbscan_clustering(**param)