From 9fd0ec06ea2d3d82ec190c08ad5540cfd1f5642d Mon Sep 17 00:00:00 2001
From: John Hendricks <jwh8zc@virginia.edu>
Date: Fri, 19 Sep 2025 23:49:59 -0400
Subject: [PATCH] Consolidated density-based clustering examples

Co-authored-by: John Hendricks <john.hendricks4@gmail.com>
Co-authored-by: Dominic Austria <daustria>
---
 examples/cluster/plot_dbscan.py               | 132 ----------
 .../cluster/plot_dbscan_hdbscan_optics.py     | 156 +++++++++++
 examples/cluster/plot_hdbscan.py              | 249 ------------------
 examples/cluster/plot_optics.py               | 108 --------
 4 files changed, 156 insertions(+), 489 deletions(-)
 delete mode 100644 examples/cluster/plot_dbscan.py
 create mode 100644 examples/cluster/plot_dbscan_hdbscan_optics.py
 delete mode 100644 examples/cluster/plot_hdbscan.py
 delete mode 100644 examples/cluster/plot_optics.py

diff --git a/examples/cluster/plot_dbscan.py b/examples/cluster/plot_dbscan.py
deleted file mode 100644
index 27a5db29c4191..0000000000000
--- a/examples/cluster/plot_dbscan.py
+++ /dev/null
@@ -1,132 +0,0 @@
-"""
-===================================
-Demo of DBSCAN clustering algorithm
-===================================
-
-DBSCAN (Density-Based Spatial Clustering of Applications with Noise) finds core
-samples in regions of high density and expands clusters from them. This
-algorithm is good for data which contains clusters of similar density.
-
-See the :ref:`sphx_glr_auto_examples_cluster_plot_cluster_comparison.py` example
-for a demo of different clustering algorithms on 2D datasets.
-
-"""
-
-# Authors: The scikit-learn developers
-# SPDX-License-Identifier: BSD-3-Clause
-
-# %%
-# Data generation
-# ---------------
-#
-# We use :class:`~sklearn.datasets.make_blobs` to create 3 synthetic clusters.
-
-from sklearn.datasets import make_blobs
-from sklearn.preprocessing import StandardScaler
-
-centers = [[1, 1], [-1, -1], [1, -1]]
-X, labels_true = make_blobs(
-    n_samples=750, centers=centers, cluster_std=0.4, random_state=0
-)
-
-X = StandardScaler().fit_transform(X)
-
-# %%
-# We can visualize the resulting data:
-
-import matplotlib.pyplot as plt
-
-plt.scatter(X[:, 0], X[:, 1])
-plt.show()
-
-# %%
-# Compute DBSCAN
-# --------------
-#
-# One can access the labels assigned by :class:`~sklearn.cluster.DBSCAN` using
-# the `labels_` attribute. Noisy samples are given the label :math:`-1`.
-
-import numpy as np
-
-from sklearn import metrics
-from sklearn.cluster import DBSCAN
-
-db = DBSCAN(eps=0.3, min_samples=10).fit(X)
-labels = db.labels_
-
-# Number of clusters in labels, ignoring noise if present.
-n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
-n_noise_ = list(labels).count(-1)
-
-print("Estimated number of clusters: %d" % n_clusters_)
-print("Estimated number of noise points: %d" % n_noise_)
-
-# %%
-# Clustering algorithms are fundamentally unsupervised learning methods.
-# However, since :class:`~sklearn.datasets.make_blobs` gives access to the true
-# labels of the synthetic clusters, it is possible to use evaluation metrics
-# that leverage this "supervised" ground truth information to quantify the
-# quality of the resulting clusters. Examples of such metrics are the
-# homogeneity, completeness, V-measure, Rand-Index, Adjusted Rand-Index and
-# Adjusted Mutual Information (AMI).
-#
-# If the ground truth labels are not known, evaluation can only be performed
-# using the model results itself. In that case, the Silhouette Coefficient comes
-# in handy.
-#
-# For more information, see the
-# :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`
-# example or the :ref:`clustering_evaluation` module.
-
-print(f"Homogeneity: {metrics.homogeneity_score(labels_true, labels):.3f}")
-print(f"Completeness: {metrics.completeness_score(labels_true, labels):.3f}")
-print(f"V-measure: {metrics.v_measure_score(labels_true, labels):.3f}")
-print(f"Adjusted Rand Index: {metrics.adjusted_rand_score(labels_true, labels):.3f}")
-print(
-    "Adjusted Mutual Information:"
-    f" {metrics.adjusted_mutual_info_score(labels_true, labels):.3f}"
-)
-print(f"Silhouette Coefficient: {metrics.silhouette_score(X, labels):.3f}")
-
-# %%
-# Plot results
-# ------------
-#
-# Core samples (large dots) and non-core samples (small dots) are color-coded
-# according to the assigned cluster. Samples tagged as noise are represented in
-# black.
-
-unique_labels = set(labels)
-core_samples_mask = np.zeros_like(labels, dtype=bool)
-core_samples_mask[db.core_sample_indices_] = True
-
-colors = [plt.cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))]
-for k, col in zip(unique_labels, colors):
-    if k == -1:
-        # Black used for noise.
-        col = [0, 0, 0, 1]
-
-    class_member_mask = labels == k
-
-    xy = X[class_member_mask & core_samples_mask]
-    plt.plot(
-        xy[:, 0],
-        xy[:, 1],
-        "o",
-        markerfacecolor=tuple(col),
-        markeredgecolor="k",
-        markersize=14,
-    )
-
-    xy = X[class_member_mask & ~core_samples_mask]
-    plt.plot(
-        xy[:, 0],
-        xy[:, 1],
-        "o",
-        markerfacecolor=tuple(col),
-        markeredgecolor="k",
-        markersize=6,
-    )
-
-plt.title(f"Estimated number of clusters: {n_clusters_}")
-plt.show()
diff --git a/examples/cluster/plot_dbscan_hdbscan_optics.py b/examples/cluster/plot_dbscan_hdbscan_optics.py
new file mode 100644
index 0000000000000..16ace795fd96d
--- /dev/null
+++ b/examples/cluster/plot_dbscan_hdbscan_optics.py
@@ -0,0 +1,156 @@
+# -*- coding: utf-8 -*-
+"""
+=====================================================
+Demo of DBSCAN, HDBSCAN, OPTICS clustering algorithms
+=====================================================
+.. currentmodule:: sklearn
+
+DBSCAN, HDBSCAN, and OPTICS are density-based clustering algorithms,
+meaning they leverage regional variations in density to identify
+meaningful clusters. This demo will begin with DBSCAN and then move to
+HDBSCAN and OPTICS to illustrate the gaps in DBSCAN that the latter
+solve for.
+"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+# %%
+# Helper Functions
+# ----------------
+#
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+def plot(X, labels, probabilities=None, parameters=None, ground_truth=False, ax=None):
+    if ax is None:
+        _, ax = plt.subplots(figsize=(10, 4))
+    labels = labels if labels is not None else np.ones(X.shape[0])
+    probabilities = probabilities if probabilities is not None else np.ones(X.shape[0])
+    # Remove black and use for noise instead.
+    unique_labels = set(labels)
+
+    colors = [plt.cm.Spectral(each) for each in np.linspace(0, 1, len(set(labels)))]
+    # The probability of a point belonging to its labeled cluster
+    # determines the size of its marker
+    proba_map = {idx: probabilities[idx] for idx in range(len(labels))}
+    for k, col in zip(unique_labels, colors):
+        if k == -1:
+            # Black used for noise.
+            col = [0, 0, 0, 1]
+
+        class_index = np.where(labels == k)[0]
+        for ci in class_index:
+            ax.plot(
+                X[ci, 0],
+                X[ci, 1],
+                "x" if k == -1 else "o",
+                markerfacecolor=tuple(col),
+                markeredgecolor="black",
+                alpha=0.5 if k == -1 else 0.1 + 0.9 * proba_map[ci],
+            )
+    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
+    preamble = "True" if ground_truth else "Estimated"
+    title = f"{preamble} number of clusters: {n_clusters_}"
+    if parameters is not None:
+        parameters_str = ", ".join(f"{k}={v}" for k, v in parameters.items())
+        title += f" | {parameters_str}"
+    ax.set_title(title)
+    plt.tight_layout()
+
+
+# %%
+# Dataset
+# -------
+#
+# Consider the following dataset with four clusters. An ideal
+# clustering algorithm will distinguish each cluster without any prior
+# information.
+
+from sklearn.datasets import make_blobs
+
+centers = [[-1, -1], [-1, 1], [3, 3], [3, -3]]
+X, labels_true = make_blobs(
+    n_samples=750, centers=centers, cluster_std=[0.2, 0.1, 0.6, 0.6], random_state=0
+)
+
+plot(X, labels=labels_true, ground_truth=True)
+# %%
+# DBSCAN
+# ------
+# DBSCAN identifies clusters by determining "core points", which are
+# samples neighboring a minimum number of other samples within a
+# certain distance. These two parameters are labeled `min_samples` and
+# `eps` in Scikit-learn. Typically, they are not known ahead of time
+# and require tuning. More details regarding the algorithm and its
+# implementation can be found in :ref:`User Guide <DBSCAN>`.
+
+# Using `eps=0.2` and `min_samples=10` on the example dataset, DBSCAN
+# correctly identifies the two left-most clusters but fails for those
+# on the right. This difference illustrates the limitations of choosing
+# a global parameter for `min_samples` and `eps` given that the density
+# varies by cluster. A key advantage of HDBSCAN and OPTICS over this
+# algorithm is their ability to identify clusters at varying density
+# thresholds.
+from sklearn.cluster import DBSCAN
+
+eps = 0.2
+min_samples = 10
+
+db = DBSCAN(eps=eps, min_samples=min_samples).fit(X)
+plot(X, db.labels_, parameters={"eps": eps, "min_samples": min_samples})
+
+# %%
+# HDBSCAN
+# -------
+# HDBSCAN builds upon DBSCAN by determining clusters at varying
+# densities. This is achieved by calculating the mutual reachability
+# distance between pairs of data points and varying this distance. The
+# full details of this algorithm can be found in :ref:`User Guide
+# <HDBSCAN>`.
+
+# Using the same dataset, HDBSCAN successfully identifies all 4 clusters
+# despite the variation in density. Unlike DBSCAN and OPTICS, this
+# algorithm can output probabilities for each label, which is
+# demonstrated below by scaling each data point's transparency by its
+# probability.
+
+from sklearn.cluster import HDBSCAN
+
+hdb = HDBSCAN(min_samples=min_samples, copy=False).fit(X)
+plot(X, hdb.labels_, hdb.probabilities_)
+
+# %%
+# OPTICS
+# ------
+# Like HDBSCAN, OPTICS can be viewed as an improvement on DBSCAN, i.e.
+# it generalizes the `eps` parameter to a range of values. Unlike
+# HDBSCAN, however, the algorithm orders samples using the reachability
+# distance. More details can be found in :ref:`User Guide<optics>`.
+#
+from sklearn.cluster import OPTICS, cluster_optics_dbscan
+
+optics = OPTICS(min_samples=min_samples, min_cluster_size=0.1).fit(X)
+plot(X, optics.labels_)
+
+# %%
+# Generalizations of DBSCAN
+#
+# HDBSCAN and OPTICS can be viewed as generalizing DBSCAN to a range of
+# densities instead of a fixed value. This is illustrated below, where
+# both algorithms yield similar clusters as DBSCAN when epsilon is
+# fixed.
+
+label_eps = cluster_optics_dbscan(
+    reachability=optics.reachability_,
+    core_distances=optics.core_distances_,
+    ordering=optics.ordering_,
+    eps=eps,
+)
+
+plot(X, label_eps)
+
+# %%
+label_eps = hdb.dbscan_clustering(eps)
+plot(X, label_eps)
diff --git a/examples/cluster/plot_hdbscan.py b/examples/cluster/plot_hdbscan.py
deleted file mode 100644
index 2d191fbf30708..0000000000000
--- a/examples/cluster/plot_hdbscan.py
+++ /dev/null
@@ -1,249 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-====================================
-Demo of HDBSCAN clustering algorithm
-====================================
-.. currentmodule:: sklearn
-
-In this demo we will take a look at :class:`cluster.HDBSCAN` from the
-perspective of generalizing the :class:`cluster.DBSCAN` algorithm.
-We'll compare both algorithms on specific datasets. Finally we'll evaluate
-HDBSCAN's sensitivity to certain hyperparameters.
-
-We first define a couple utility functions for convenience.
-"""
-
-# Authors: The scikit-learn developers
-# SPDX-License-Identifier: BSD-3-Clause
-
-# %%
-import matplotlib.pyplot as plt
-import numpy as np
-
-from sklearn.cluster import DBSCAN, HDBSCAN
-from sklearn.datasets import make_blobs
-
-
-def plot(X, labels, probabilities=None, parameters=None, ground_truth=False, ax=None):
-    if ax is None:
-        _, ax = plt.subplots(figsize=(10, 4))
-    labels = labels if labels is not None else np.ones(X.shape[0])
-    probabilities = probabilities if probabilities is not None else np.ones(X.shape[0])
-    # Black removed and is used for noise instead.
-    unique_labels = set(labels)
-    colors = [plt.cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))]
-    # The probability of a point belonging to its labeled cluster determines
-    # the size of its marker
-    proba_map = {idx: probabilities[idx] for idx in range(len(labels))}
-    for k, col in zip(unique_labels, colors):
-        if k == -1:
-            # Black used for noise.
-            col = [0, 0, 0, 1]
-
-        class_index = (labels == k).nonzero()[0]
-        for ci in class_index:
-            ax.plot(
-                X[ci, 0],
-                X[ci, 1],
-                "x" if k == -1 else "o",
-                markerfacecolor=tuple(col),
-                markeredgecolor="k",
-                markersize=4 if k == -1 else 1 + 5 * proba_map[ci],
-            )
-    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
-    preamble = "True" if ground_truth else "Estimated"
-    title = f"{preamble} number of clusters: {n_clusters_}"
-    if parameters is not None:
-        parameters_str = ", ".join(f"{k}={v}" for k, v in parameters.items())
-        title += f" | {parameters_str}"
-    ax.set_title(title)
-    plt.tight_layout()
-
-
-# %%
-# Generate sample data
-# --------------------
-# One of the greatest advantages of HDBSCAN over DBSCAN is its out-of-the-box
-# robustness. It's especially remarkable on heterogeneous mixtures of data.
-# Like DBSCAN, it can model arbitrary shapes and distributions, however unlike
-# DBSCAN it does not require specification of an arbitrary and sensitive
-# `eps` hyperparameter.
-#
-# For example, below we generate a dataset from a mixture of three bi-dimensional
-# and isotropic Gaussian distributions.
-centers = [[1, 1], [-1, -1], [1.5, -1.5]]
-X, labels_true = make_blobs(
-    n_samples=750, centers=centers, cluster_std=[0.4, 0.1, 0.75], random_state=0
-)
-plot(X, labels=labels_true, ground_truth=True)
-# %%
-# Scale Invariance
-# -----------------
-# It's worth remembering that, while DBSCAN provides a default value for `eps`
-# parameter, it hardly has a proper default value and must be tuned for the
-# specific dataset at use.
-#
-# As a simple demonstration, consider the clustering for a `eps` value tuned
-# for one dataset, and clustering obtained with the same value but applied to
-# rescaled versions of the dataset.
-fig, axes = plt.subplots(3, 1, figsize=(10, 12))
-dbs = DBSCAN(eps=0.3)
-for idx, scale in enumerate([1, 0.5, 3]):
-    dbs.fit(X * scale)
-    plot(X * scale, dbs.labels_, parameters={"scale": scale, "eps": 0.3}, ax=axes[idx])
-
-# %%
-# Indeed, in order to maintain the same results we would have to scale `eps` by
-# the same factor.
-fig, axis = plt.subplots(1, 1, figsize=(12, 5))
-dbs = DBSCAN(eps=0.9).fit(3 * X)
-plot(3 * X, dbs.labels_, parameters={"scale": 3, "eps": 0.9}, ax=axis)
-# %%
-# While standardizing data (e.g. using
-# :class:`sklearn.preprocessing.StandardScaler`) helps mitigate this problem,
-# great care must be taken to select the appropriate value for `eps`.
-#
-# HDBSCAN is much more robust in this sense: HDBSCAN can be seen as
-# clustering over all possible values of `eps` and extracting the best
-# clusters from all possible clusters (see :ref:`User Guide <HDBSCAN>`).
-# One immediate advantage is that HDBSCAN is scale-invariant.
-fig, axes = plt.subplots(3, 1, figsize=(10, 12))
-hdb = HDBSCAN(copy=True)
-for idx, scale in enumerate([1, 0.5, 3]):
-    hdb.fit(X * scale)
-    plot(
-        X * scale,
-        hdb.labels_,
-        hdb.probabilities_,
-        ax=axes[idx],
-        parameters={"scale": scale},
-    )
-# %%
-# Multi-Scale Clustering
-# ----------------------
-# HDBSCAN is much more than scale invariant though -- it is capable of
-# multi-scale clustering, which accounts for clusters with varying density.
-# Traditional DBSCAN assumes that any potential clusters are homogeneous in
-# density. HDBSCAN is free from such constraints. To demonstrate this we
-# consider the following dataset
-centers = [[-0.85, -0.85], [-0.85, 0.85], [3, 3], [3, -3]]
-X, labels_true = make_blobs(
-    n_samples=750, centers=centers, cluster_std=[0.2, 0.35, 1.35, 1.35], random_state=0
-)
-plot(X, labels=labels_true, ground_truth=True)
-
-# %%
-# This dataset is more difficult for DBSCAN due to the varying densities and
-# spatial separation:
-#
-# - If `eps` is too large then we risk falsely clustering the two dense
-#   clusters as one since their mutual reachability will extend
-#   clusters.
-# - If `eps` is too small, then we risk fragmenting the sparser clusters
-#   into many false clusters.
-#
-# Not to mention this requires manually tuning choices of `eps` until we
-# find a tradeoff that we are comfortable with.
-fig, axes = plt.subplots(2, 1, figsize=(10, 8))
-params = {"eps": 0.7}
-dbs = DBSCAN(**params).fit(X)
-plot(X, dbs.labels_, parameters=params, ax=axes[0])
-params = {"eps": 0.3}
-dbs = DBSCAN(**params).fit(X)
-plot(X, dbs.labels_, parameters=params, ax=axes[1])
-
-# %%
-# To properly cluster the two dense clusters, we would need a smaller value of
-# epsilon, however at `eps=0.3` we are already fragmenting the sparse clusters,
-# which would only become more severe as we decrease epsilon. Indeed it seems
-# that DBSCAN is incapable of simultaneously separating the two dense clusters
-# while preventing the sparse clusters from fragmenting. Let's compare with
-# HDBSCAN.
-hdb = HDBSCAN(copy=True).fit(X)
-plot(X, hdb.labels_, hdb.probabilities_)
-
-# %%
-# HDBSCAN is able to adapt to the multi-scale structure of the dataset without
-# requiring parameter tuning. While any sufficiently interesting dataset will
-# require tuning, this case demonstrates that HDBSCAN can yield qualitatively
-# better classes of clusterings without users' intervention which are
-# inaccessible via DBSCAN.
-
-# %%
-# Hyperparameter Robustness
-# -------------------------
-# Ultimately tuning will be an important step in any real world application, so
-# let's take a look at some of the most important hyperparameters for HDBSCAN.
-# While HDBSCAN is free from the `eps` parameter of DBSCAN, it does still have
-# some hyperparameters like `min_cluster_size` and `min_samples` which tune its
-# results regarding density. We will however see that HDBSCAN is relatively robust
-# to various real world examples thanks to those parameters whose clear meaning
-# helps tuning them.
-#
-# `min_cluster_size`
-# ^^^^^^^^^^^^^^^^^^
-# `min_cluster_size` is the minimum number of samples in a group for that
-# group to be considered a cluster.
-#
-# Clusters smaller than the ones of this size will be left as noise.
-# The default value is 5. This parameter is generally tuned to
-# larger values as needed. Smaller values will likely to lead to results with
-# fewer points labeled as noise. However values which too small will lead to
-# false sub-clusters being picked up and preferred. Larger values tend to be
-# more robust with respect to noisy datasets, e.g. high-variance clusters with
-# significant overlap.
-
-PARAM = ({"min_cluster_size": 5}, {"min_cluster_size": 3}, {"min_cluster_size": 25})
-fig, axes = plt.subplots(3, 1, figsize=(10, 12))
-for i, param in enumerate(PARAM):
-    hdb = HDBSCAN(copy=True, **param).fit(X)
-    labels = hdb.labels_
-
-    plot(X, labels, hdb.probabilities_, param, ax=axes[i])
-
-# %%
-# `min_samples`
-# ^^^^^^^^^^^^^
-# `min_samples` is the number of samples in a neighborhood for a point to
-# be considered as a core point, including the point itself.
-# `min_samples` defaults to `min_cluster_size`.
-# Similarly to `min_cluster_size`, larger values for `min_samples` increase
-# the model's robustness to noise, but risks ignoring or discarding
-# potentially valid but small clusters.
-# `min_samples` better be tuned after finding a good value for `min_cluster_size`.
-
-PARAM = (
-    {"min_cluster_size": 20, "min_samples": 5},
-    {"min_cluster_size": 20, "min_samples": 3},
-    {"min_cluster_size": 20, "min_samples": 25},
-)
-fig, axes = plt.subplots(3, 1, figsize=(10, 12))
-for i, param in enumerate(PARAM):
-    hdb = HDBSCAN(copy=True, **param).fit(X)
-    labels = hdb.labels_
-
-    plot(X, labels, hdb.probabilities_, param, ax=axes[i])
-
-# %%
-# `dbscan_clustering`
-# ^^^^^^^^^^^^^^^^^^^
-# During `fit`, `HDBSCAN` builds a single-linkage tree which encodes the
-# clustering of all points across all values of :class:`~cluster.DBSCAN`'s
-# `eps` parameter.
-# We can thus plot and evaluate these clusterings efficiently without fully
-# recomputing intermediate values such as core-distances, mutual-reachability,
-# and the minimum spanning tree. All we need to do is specify the `cut_distance`
-# (equivalent to `eps`) we want to cluster with.
-
-PARAM = (
-    {"cut_distance": 0.1},
-    {"cut_distance": 0.5},
-    {"cut_distance": 1.0},
-)
-hdb = HDBSCAN(copy=True)
-hdb.fit(X)
-fig, axes = plt.subplots(len(PARAM), 1, figsize=(10, 12))
-for i, param in enumerate(PARAM):
-    labels = hdb.dbscan_clustering(**param)
-
-    plot(X, labels, hdb.probabilities_, param, ax=axes[i])
diff --git a/examples/cluster/plot_optics.py b/examples/cluster/plot_optics.py
deleted file mode 100644
index 26218302542d9..0000000000000
--- a/examples/cluster/plot_optics.py
+++ /dev/null
@@ -1,108 +0,0 @@
-"""
-===================================
-Demo of OPTICS clustering algorithm
-===================================
-
-.. currentmodule:: sklearn
-
-Finds core samples of high density and expands clusters from them.
-This example uses data that is generated so that the clusters have
-different densities.
-
-The :class:`~cluster.OPTICS` is first used with its Xi cluster detection
-method, and then setting specific thresholds on the reachability, which
-corresponds to :class:`~cluster.DBSCAN`. We can see that the different
-clusters of OPTICS's Xi method can be recovered with different choices of
-thresholds in DBSCAN.
-
-"""
-
-# Authors: The scikit-learn developers
-# SPDX-License-Identifier: BSD-3-Clause
-
-import matplotlib.gridspec as gridspec
-import matplotlib.pyplot as plt
-import numpy as np
-
-from sklearn.cluster import OPTICS, cluster_optics_dbscan
-
-# Generate sample data
-
-np.random.seed(0)
-n_points_per_cluster = 250
-
-C1 = [-5, -2] + 0.8 * np.random.randn(n_points_per_cluster, 2)
-C2 = [4, -1] + 0.1 * np.random.randn(n_points_per_cluster, 2)
-C3 = [1, -2] + 0.2 * np.random.randn(n_points_per_cluster, 2)
-C4 = [-2, 3] + 0.3 * np.random.randn(n_points_per_cluster, 2)
-C5 = [3, -2] + 1.6 * np.random.randn(n_points_per_cluster, 2)
-C6 = [5, 6] + 2 * np.random.randn(n_points_per_cluster, 2)
-X = np.vstack((C1, C2, C3, C4, C5, C6))
-
-clust = OPTICS(min_samples=50, xi=0.05, min_cluster_size=0.05)
-
-# Run the fit
-clust.fit(X)
-
-labels_050 = cluster_optics_dbscan(
-    reachability=clust.reachability_,
-    core_distances=clust.core_distances_,
-    ordering=clust.ordering_,
-    eps=0.5,
-)
-labels_200 = cluster_optics_dbscan(
-    reachability=clust.reachability_,
-    core_distances=clust.core_distances_,
-    ordering=clust.ordering_,
-    eps=2,
-)
-
-space = np.arange(len(X))
-reachability = clust.reachability_[clust.ordering_]
-labels = clust.labels_[clust.ordering_]
-
-plt.figure(figsize=(10, 7))
-G = gridspec.GridSpec(2, 3)
-ax1 = plt.subplot(G[0, :])
-ax2 = plt.subplot(G[1, 0])
-ax3 = plt.subplot(G[1, 1])
-ax4 = plt.subplot(G[1, 2])
-
-# Reachability plot
-colors = ["g.", "r.", "b.", "y.", "c."]
-for klass, color in enumerate(colors):
-    Xk = space[labels == klass]
-    Rk = reachability[labels == klass]
-    ax1.plot(Xk, Rk, color, alpha=0.3)
-ax1.plot(space[labels == -1], reachability[labels == -1], "k.", alpha=0.3)
-ax1.plot(space, np.full_like(space, 2.0, dtype=float), "k-", alpha=0.5)
-ax1.plot(space, np.full_like(space, 0.5, dtype=float), "k-.", alpha=0.5)
-ax1.set_ylabel("Reachability (epsilon distance)")
-ax1.set_title("Reachability Plot")
-
-# OPTICS
-colors = ["g.", "r.", "b.", "y.", "c."]
-for klass, color in enumerate(colors):
-    Xk = X[clust.labels_ == klass]
-    ax2.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3)
-ax2.plot(X[clust.labels_ == -1, 0], X[clust.labels_ == -1, 1], "k+", alpha=0.1)
-ax2.set_title("Automatic Clustering\nOPTICS")
-
-# DBSCAN at 0.5
-colors = ["g.", "r.", "b.", "c."]
-for klass, color in enumerate(colors):
-    Xk = X[labels_050 == klass]
-    ax3.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3)
-ax3.plot(X[labels_050 == -1, 0], X[labels_050 == -1, 1], "k+", alpha=0.1)
-ax3.set_title("Clustering at 0.5 epsilon cut\nDBSCAN")
-
-# DBSCAN at 2.
-colors = ["g.", "m.", "y.", "c."]
-for klass, color in enumerate(colors):
-    Xk = X[labels_200 == klass]
-    ax4.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3)
-ax4.plot(X[labels_200 == -1, 0], X[labels_200 == -1, 1], "k+", alpha=0.1)
-ax4.set_title("Clustering at 2.0 epsilon cut\nDBSCAN")
-
-plt.tight_layout()
-plt.show()