From bda0c10f063c6d60e737072b2a7f0187cf7669ab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=20Dupr=C3=A9=20la=20Tour?= <tom.dupre-la-tour@m4x.org>
Date: Tue, 18 Sep 2018 11:24:11 +0200
Subject: [PATCH 1/5] move core_distances_computation in a helper

---
 sklearn/cluster/optics_.py | 43 +++++++++++++++++++++++++++++++++-----
 1 file changed, 38 insertions(+), 5 deletions(-)

diff --git a/sklearn/cluster/optics_.py b/sklearn/cluster/optics_.py
index 19b6a79f45994..785e45578df7e 100755
--- a/sklearn/cluster/optics_.py
+++ b/sklearn/cluster/optics_.py
@@ -14,6 +14,7 @@
 import numpy as np
 
 from ..utils import check_array
+from ..utils import gen_batches, get_chunk_n_rows
 from ..utils.validation import check_is_fitted
 from ..neighbors import NearestNeighbors
 from ..base import BaseEstimator, ClusterMixin
@@ -395,8 +396,6 @@ def fit(self, X, y=None):
         # Start all points as 'unprocessed' ##
         self.reachability_ = np.empty(n_samples)
         self.reachability_.fill(np.inf)
-        self.core_distances_ = np.empty(n_samples)
-        self.core_distances_.fill(np.nan)
         # Start all points as noise ##
         self.labels_ = np.full(n_samples, -1, dtype=int)
 
@@ -407,9 +406,7 @@ def fit(self, X, y=None):
                                 n_jobs=self.n_jobs)
 
         nbrs.fit(X)
-        self.core_distances_[:] = nbrs.kneighbors(X,
-                                                  self.min_samples)[0][:, -1]
-
+        self.core_distances_ = self._calculate_core_distances_(X, nbrs)
         self.ordering_ = self._calculate_optics_order(X, nbrs)
 
         indices_, self.labels_ = _extract_optics(self.ordering_,
@@ -425,6 +422,42 @@ def fit(self, X, y=None):
 
     # OPTICS helper functions
 
+    def _calculate_core_distances_(self, X, nbrs, working_memory=None):
+        """Compute the k-th nearest neighbor of each sample
+
+        Equivalent to nbrs.kneighbors(X, self.min_samples)[0][:, -1]
+        but with more memory efficiency.
+
+        Parameters
+        ----------
+        X : array, shape (n_samples, n_features)
+            The data.
+        nbrs : NearestNeighbors instance
+            The fitted nearest neeighbors estimator
+        working_memory : int, optional
+            The sought maximum memory for temporary distance matrix chunks.
+            When None (default), the value of
+            ``sklearn.get_config()['working_memory']`` is used.
+
+        Returns
+        -------
+        core_distances : array, shape (n_samples,)
+            Distance at which each sample becomes a core point.
+            Points which will never be core have a distance of inf.
+        """
+        n_samples = len(X)
+        core_distances = np.empty(n_samples)
+        core_distances.fill(np.nan)
+
+        chunk_n_rows = get_chunk_n_rows(row_bytes=8 * self.min_samples,
+                                        max_n_rows=n_samples,
+                                        working_memory=working_memory)
+        slices = gen_batches(n_samples, chunk_n_rows)
+        for sl in slices:
+            core_distances[sl] = nbrs.kneighbors(
+                X[sl], self.min_samples)[0][:, -1]
+        return core_distances
+
     def _calculate_optics_order(self, X, nbrs):
         # Main OPTICS loop. Not parallelizable. The order that entries are
         # written to the 'ordering_' list is important!

From fdb527284861b85d447545da2fadeac5571af2e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=20Dupr=C3=A9=20la=20Tour?= <tom.dupre-la-tour@m4x.org>
Date: Tue, 18 Sep 2018 11:43:05 +0200
Subject: [PATCH 2/5] typo

---
 sklearn/cluster/optics_.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/optics_.py b/sklearn/cluster/optics_.py
index 785e45578df7e..840f0f7959be2 100755
--- a/sklearn/cluster/optics_.py
+++ b/sklearn/cluster/optics_.py
@@ -433,7 +433,7 @@ def _calculate_core_distances_(self, X, nbrs, working_memory=None):
         X : array, shape (n_samples, n_features)
             The data.
         nbrs : NearestNeighbors instance
-            The fitted nearest neeighbors estimator
+            The fitted nearest neighbors estimator.
         working_memory : int, optional
             The sought maximum memory for temporary distance matrix chunks.
             When None (default), the value of

From 7cb65a4ed2cc0dfd53d9dedda39c15cc8311f72c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=20Dupr=C3=A9=20la=20Tour?= <tom.dupre-la-tour@m4x.org>
Date: Tue, 18 Sep 2018 14:59:54 +0200
Subject: [PATCH 3/5] renaming

---
 sklearn/cluster/optics_.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/cluster/optics_.py b/sklearn/cluster/optics_.py
index 840f0f7959be2..e78935587e90f 100755
--- a/sklearn/cluster/optics_.py
+++ b/sklearn/cluster/optics_.py
@@ -406,7 +406,7 @@ def fit(self, X, y=None):
                                 n_jobs=self.n_jobs)
 
         nbrs.fit(X)
-        self.core_distances_ = self._calculate_core_distances_(X, nbrs)
+        self.core_distances_ = self._compute_core_distances_(X, nbrs)
         self.ordering_ = self._calculate_optics_order(X, nbrs)
 
         indices_, self.labels_ = _extract_optics(self.ordering_,
@@ -422,7 +422,7 @@ def fit(self, X, y=None):
 
     # OPTICS helper functions
 
-    def _calculate_core_distances_(self, X, nbrs, working_memory=None):
+    def _compute_core_distances_(self, X, neighbors, working_memory=None):
         """Compute the k-th nearest neighbor of each sample
 
         Equivalent to nbrs.kneighbors(X, self.min_samples)[0][:, -1]
@@ -432,7 +432,7 @@ def _calculate_core_distances_(self, X, nbrs, working_memory=None):
         ----------
         X : array, shape (n_samples, n_features)
             The data.
-        nbrs : NearestNeighbors instance
+        neighbors : NearestNeighbors instance
             The fitted nearest neighbors estimator.
         working_memory : int, optional
             The sought maximum memory for temporary distance matrix chunks.
@@ -454,7 +454,7 @@ def _calculate_core_distances_(self, X, nbrs, working_memory=None):
                                         working_memory=working_memory)
         slices = gen_batches(n_samples, chunk_n_rows)
         for sl in slices:
-            core_distances[sl] = nbrs.kneighbors(
+            core_distances[sl] = neighbors.kneighbors(
                 X[sl], self.min_samples)[0][:, -1]
         return core_distances
 

From 13a42bdbe41de428707801564d699882230a7c6a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=20Dupr=C3=A9=20la=20Tour?= <tom.dupre-la-tour@m4x.org>
Date: Tue, 18 Sep 2018 15:01:21 +0200
Subject: [PATCH 4/5] FIX memory consumption by sample

---
 sklearn/cluster/optics_.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/optics_.py b/sklearn/cluster/optics_.py
index e78935587e90f..7f5d3cf3f5e81 100755
--- a/sklearn/cluster/optics_.py
+++ b/sklearn/cluster/optics_.py
@@ -449,7 +449,7 @@ def _compute_core_distances_(self, X, neighbors, working_memory=None):
         core_distances = np.empty(n_samples)
         core_distances.fill(np.nan)
 
-        chunk_n_rows = get_chunk_n_rows(row_bytes=8 * self.min_samples,
+        chunk_n_rows = get_chunk_n_rows(row_bytes=16 * self.min_samples,
                                         max_n_rows=n_samples,
                                         working_memory=working_memory)
         slices = gen_batches(n_samples, chunk_n_rows)

From 40ecd64394558fe3cf7971982d4cf7e773e34c1c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=20Dupr=C3=A9=20la=20Tour?= <tom.dupre-la-tour@m4x.org>
Date: Tue, 18 Sep 2018 19:26:23 +0200
Subject: [PATCH 5/5] DOC minor

---
 sklearn/cluster/optics_.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/optics_.py b/sklearn/cluster/optics_.py
index 7f5d3cf3f5e81..9b10912cca970 100755
--- a/sklearn/cluster/optics_.py
+++ b/sklearn/cluster/optics_.py
@@ -425,7 +425,7 @@ def fit(self, X, y=None):
     def _compute_core_distances_(self, X, neighbors, working_memory=None):
         """Compute the k-th nearest neighbor of each sample
 
-        Equivalent to nbrs.kneighbors(X, self.min_samples)[0][:, -1]
+        Equivalent to neighbors.kneighbors(X, self.min_samples)[0][:, -1]
         but with more memory efficiency.
 
         Parameters