diff --git a/sklearn/cluster/optics_.py b/sklearn/cluster/optics_.py index 9b10912cca970..d9cf6ce112419 100755 --- a/sklearn/cluster/optics_.py +++ b/sklearn/cluster/optics_.py @@ -6,6 +6,7 @@ Authors: Shane Grigsby Amy X. Zhang + Erich Schubert License: BSD 3 clause """ @@ -317,6 +318,10 @@ class OPTICS(BaseEstimator, ClusterMixin): Distance at which each sample becomes a core point. Points which will never be core have a distance of inf. + predecessor_ : array, shape (n_samples,) + Point that a sample was reached from. + Seed points have a predecessor of -1. + See also -------- @@ -329,6 +334,10 @@ class OPTICS(BaseEstimator, ClusterMixin): Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel, and Jörg Sander. "OPTICS: ordering points to identify the clustering structure." ACM SIGMOD Record 28, no. 2 (1999): 49-60. + + Schubert, Erich, Michael Gertz. + "Improving the Cluster Structure Extracted from OPTICS Plots." Proc. of + the Conference "Lernen, Wissen, Daten, Analysen" (LWDA) (2018): 318-329. """ def __init__(self, min_samples=5, max_eps=np.inf, metric='euclidean', @@ -396,6 +405,8 @@ def fit(self, X, y=None): # Start all points as 'unprocessed' ## self.reachability_ = np.empty(n_samples) self.reachability_.fill(np.inf) + self.predecessor_ = np.empty(n_samples, dtype=int) + self.predecessor_.fill(-1) # Start all points as noise ## self.labels_ = np.full(n_samples, -1, dtype=int) @@ -499,8 +510,9 @@ def _set_reach_dist(self, point_index, processed, X, nbrs): self.metric, n_jobs=None).ravel() rdists = np.maximum(dists, self.core_distances_[point_index]) - new_reach = np.minimum(np.take(self.reachability_, unproc), rdists) - self.reachability_[unproc] = new_reach + improved = np.where(rdists < np.take(self.reachability_, unproc)) + self.reachability_[unproc[improved]] = rdists[improved] + self.predecessor_[unproc[improved]] = point_index # Define return order based on reachability distance return (unproc[quick_scan(np.take(self.reachability_, unproc),