From 9fb7312b9a64360c4df73c14914d0a89f339004c Mon Sep 17 00:00:00 2001
From: "johann.faouzi" <johann.faouzi@icm-institute.org>
Date: Wed, 26 Jun 2019 11:55:37 +0200
Subject: [PATCH 01/11] Use resample to compute the small training set in
 HistGBT

---
 .../gradient_boosting.py                      | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
index 5054c58ffc49f..4a594bc04fc9e 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
@@ -6,7 +6,7 @@
 import numpy as np
 from timeit import default_timer as time
 from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin
-from sklearn.utils import check_X_y, check_random_state, check_array
+from sklearn.utils import check_X_y, check_random_state, check_array, resample
 from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.multiclass import check_classification_targets
 from sklearn.metrics import check_scoring
@@ -386,15 +386,16 @@ def _get_small_trainset(self, X_binned_train, y_train, seed):
         with scorers.
         """
         subsample_size = 10000
-        rng = check_random_state(seed)
-        indices = np.arange(X_binned_train.shape[0])
         if X_binned_train.shape[0] > subsample_size:
-            # TODO: not critical but stratify using resample()
-            indices = rng.choice(indices, subsample_size, replace=False)
-        X_binned_small_train = X_binned_train[indices]
-        y_small_train = y_train[indices]
-        X_binned_small_train = np.ascontiguousarray(X_binned_small_train)
-        return X_binned_small_train, y_small_train
+            indices = np.arange(X_binned_train.shape[0])
+            indices = resample(indices, n_samples=subsample_size,
+                               replace=False, random_state=seed)
+            X_binned_small_train = X_binned_train[indices]
+            y_small_train = y_train[indices]
+            X_binned_small_train = np.ascontiguousarray(X_binned_small_train)
+            return X_binned_small_train, y_small_train
+        else:
+            return X_binned_train, y_train
 
     def _check_early_stopping_scorer(self, X_binned_small_train, y_small_train,
                                      X_binned_val, y_val):

From e0c7332a8c56163d3164697783dcd87025011890 Mon Sep 17 00:00:00 2001
From: "johann.faouzi" <johann.faouzi@icm-institute.org>
Date: Mon, 1 Jul 2019 12:09:09 +0200
Subject: [PATCH 02/11] Stratify the resampling

---
 .../ensemble/_hist_gradient_boosting/gradient_boosting.py  | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
index 4a594bc04fc9e..33c128c66ec4c 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
@@ -5,7 +5,8 @@
 
 import numpy as np
 from timeit import default_timer as time
-from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin
+from sklearn.base import (BaseEstimator, RegressorMixin, ClassifierMixin,
+                          is_classifier)
 from sklearn.utils import check_X_y, check_random_state, check_array, resample
 from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.multiclass import check_classification_targets
@@ -388,8 +389,10 @@ def _get_small_trainset(self, X_binned_train, y_train, seed):
         subsample_size = 10000
         if X_binned_train.shape[0] > subsample_size:
             indices = np.arange(X_binned_train.shape[0])
+            stratify = y_train if is_classifier(self) else None
             indices = resample(indices, n_samples=subsample_size,
-                               replace=False, random_state=seed)
+                               replace=False, random_state=seed,
+                               stratify=stratify)
             X_binned_small_train = X_binned_train[indices]
             y_small_train = y_train[indices]
             X_binned_small_train = np.ascontiguousarray(X_binned_small_train)

From 29f5b506976e071e544b0e7a719c305d1087bbd7 Mon Sep 17 00:00:00 2001
From: "johann.faouzi" <johann.faouzi@icm-institute.org>
Date: Mon, 1 Jul 2019 13:57:22 +0200
Subject: [PATCH 03/11] Up-to-date branch

---
 doc/whats_new/v0.22.rst                       |   4 +
 examples/compose/plot_column_transformer.py   |   2 +-
 .../plot_ica_blind_source_separation.py       |   2 +-
 examples/manifold/plot_lle_digits.py          |  11 +-
 examples/manifold/plot_mds.py                 |   6 +-
 sklearn/_build_utils/__init__.py              |  11 +
 sklearn/cluster/tests/test_optics.py          |   2 +-
 sklearn/externals/_lobpcg.py                  | 661 ++++++++++++++++++
 .../tests/test_variance_threshold.py          |   4 +-
 sklearn/impute/tests/test_impute.py           |  23 +-
 sklearn/linear_model/tests/test_ridge.py      |   5 +-
 sklearn/manifold/spectral_embedding_.py       |   3 +-
 sklearn/model_selection/_search.py            |  15 +
 .../tests/test_discretization.py              |  10 +-
 sklearn/utils/estimator_checks.py             |  10 +-
 sklearn/utils/fixes.py                        |   6 +
 16 files changed, 743 insertions(+), 32 deletions(-)
 create mode 100644 sklearn/externals/_lobpcg.py

diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
index 4e6e4b0dd8258..1089284a9f6a9 100644
--- a/doc/whats_new/v0.22.rst
+++ b/doc/whats_new/v0.22.rst
@@ -129,6 +129,10 @@ Miscellaneous
   using a non-fitted estimators are now more uniform.
   :pr:`13013` by :user:`Agamemnon Krasoulis <agamemnonc>`.
 
+- |Fix| Port `lobpcg` from SciPy which implement some bug fixes but only
+  available in 1.3+.
+  :pr:`14195` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 Changes to estimator checks
 ---------------------------
 
diff --git a/examples/compose/plot_column_transformer.py b/examples/compose/plot_column_transformer.py
index 02599a12396d6..181e3e9127b56 100644
--- a/examples/compose/plot_column_transformer.py
+++ b/examples/compose/plot_column_transformer.py
@@ -116,7 +116,7 @@ def transform(self, posts):
     )),
 
     # Use a SVC classifier on the combined features
-    ('svc', LinearSVC()),
+    ('svc', LinearSVC(dual=False)),
 ], verbose=True)
 
 # limit the list of categories to make running this example faster.
diff --git a/examples/decomposition/plot_ica_blind_source_separation.py b/examples/decomposition/plot_ica_blind_source_separation.py
index fb7689064dd06..b405b1770cd34 100644
--- a/examples/decomposition/plot_ica_blind_source_separation.py
+++ b/examples/decomposition/plot_ica_blind_source_separation.py
@@ -69,5 +69,5 @@
     for sig, color in zip(model.T, colors):
         plt.plot(sig, color=color)
 
-plt.subplots_adjust(0.09, 0.04, 0.94, 0.94, 0.26, 0.46)
+plt.tight_layout()
 plt.show()
diff --git a/examples/manifold/plot_lle_digits.py b/examples/manifold/plot_lle_digits.py
index 4a3002a05d0dd..e2b0953e7e747 100644
--- a/examples/manifold/plot_lle_digits.py
+++ b/examples/manifold/plot_lle_digits.py
@@ -28,14 +28,13 @@
 #          Gael Varoquaux
 # License: BSD 3 clause (C) INRIA 2011
 
-print(__doc__)
 from time import time
-
 import numpy as np
 import matplotlib.pyplot as plt
 from matplotlib import offsetbox
 from sklearn import (manifold, datasets, decomposition, ensemble,
                      discriminant_analysis, random_projection, neighbors)
+print(__doc__)
 
 digits = datasets.load_digits(n_class=6)
 X = digits.data
@@ -99,7 +98,7 @@ def plot_embedding(X, title=None):
 plot_embedding(X_projected, "Random Projection of the digits")
 
 
-#----------------------------------------------------------------------
+# ----------------------------------------------------------------------
 # Projection on to the first 2 principal components
 
 print("Computing PCA projection")
@@ -116,7 +115,8 @@ def plot_embedding(X, title=None):
 X2 = X.copy()
 X2.flat[::X.shape[1] + 1] += 0.01  # Make X invertible
 t0 = time()
-X_lda = discriminant_analysis.LinearDiscriminantAnalysis(n_components=2).fit_transform(X2, y)
+X_lda = discriminant_analysis.LinearDiscriminantAnalysis(n_components=2
+                                                         ).fit_transform(X2, y)
 plot_embedding(X_lda,
                "Linear Discriminant projection of the digits (time %.2fs)" %
                (time() - t0))
@@ -235,7 +235,8 @@ def plot_embedding(X, title=None):
 # ----------------------------------------------------------------------
 # NCA projection of the digits dataset
 print("Computing NCA projection")
-nca = neighbors.NeighborhoodComponentsAnalysis(n_components=2, random_state=0)
+nca = neighbors.NeighborhoodComponentsAnalysis(init='random',
+                                               n_components=2, random_state=0)
 t0 = time()
 X_nca = nca.fit_transform(X, y)
 
diff --git a/examples/manifold/plot_mds.py b/examples/manifold/plot_mds.py
index 6398e2f7a6242..555d9b5e92bc3 100644
--- a/examples/manifold/plot_mds.py
+++ b/examples/manifold/plot_mds.py
@@ -22,6 +22,7 @@
 from sklearn.metrics import euclidean_distances
 from sklearn.decomposition import PCA
 
+EPSILON = np.finfo(np.float32).eps
 n_samples = 20
 seed = np.random.RandomState(seed=3)
 X_true = seed.randint(0, 20, 2 * n_samples).astype(np.float)
@@ -68,9 +69,8 @@
 plt.scatter(npos[:, 0], npos[:, 1], color='darkorange', s=s, lw=0, label='NMDS')
 plt.legend(scatterpoints=1, loc='best', shadow=False)
 
-similarities = similarities.max() / similarities * 100
-similarities[np.isinf(similarities)] = 0
-
+similarities = similarities.max() / (similarities + EPSILON) * 100
+np.fill_diagonal(similarities, 0)
 # Plot the edges
 start_idx, end_idx = np.where(pos)
 # a sequence of (*line0*, *line1*, *line2*), where::
diff --git a/sklearn/_build_utils/__init__.py b/sklearn/_build_utils/__init__.py
index 2d872dd378998..5353d3b297965 100644
--- a/sklearn/_build_utils/__init__.py
+++ b/sklearn/_build_utils/__init__.py
@@ -8,6 +8,7 @@
 import os
 
 from distutils.version import LooseVersion
+import contextlib
 
 from numpy.distutils.system_info import get_info
 
@@ -86,7 +87,17 @@ def maybe_cythonize_extensions(top_path, config):
             exc.args += (message,)
             raise
 
+        n_jobs = 1
+        with contextlib.suppress(ImportError):
+            import joblib
+            if LooseVersion(joblib.__version__) > LooseVersion("0.13.0"):
+                # earlier joblib versions don't account for CPU affinity
+                # constraints, and may over-estimate the number of available
+                # CPU particularly in CI (cf loky#114)
+                n_jobs = joblib.effective_n_jobs()
+
         config.ext_modules = cythonize(
             config.ext_modules,
+            nthreads=n_jobs,
             compile_time_env={'SKLEARN_OPENMP_SUPPORTED': with_openmp},
             compiler_directives={'language_level': 3})
diff --git a/sklearn/cluster/tests/test_optics.py b/sklearn/cluster/tests/test_optics.py
index 1e3d99746c9e9..1028d7174bb4e 100644
--- a/sklearn/cluster/tests/test_optics.py
+++ b/sklearn/cluster/tests/test_optics.py
@@ -109,7 +109,7 @@ def test_extract_xi():
 
     clust = OPTICS(min_samples=3, min_cluster_size=3,
                    max_eps=20, cluster_method='xi',
-                   xi=0.1).fit(X)
+                   xi=0.3).fit(X)
     # this may fail if the predecessor correction is not at work!
     assert_array_equal(clust.labels_, expected_labels)
 
diff --git a/sklearn/externals/_lobpcg.py b/sklearn/externals/_lobpcg.py
new file mode 100644
index 0000000000000..30492c97c182b
--- /dev/null
+++ b/sklearn/externals/_lobpcg.py
@@ -0,0 +1,661 @@
+"""
+scikit-learn copy of scipy/sparse/linalg/eigen/lobpcg/lobpcg.py v1.3.0
+to be deleted after scipy 1.3.0 becomes a dependency in scikit-lean
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+Locally Optimal Block Preconditioned Conjugate Gradient Method (LOBPCG).
+
+References
+----------
+.. [1] A. V. Knyazev (2001),
+       Toward the Optimal Preconditioned Eigensolver: Locally Optimal
+       Block Preconditioned Conjugate Gradient Method.
+       SIAM Journal on Scientific Computing 23, no. 2,
+       pp. 517-541. http://dx.doi.org/10.1137/S1064827500366124
+
+.. [2] A. V. Knyazev, I. Lashuk, M. E. Argentati, and E. Ovchinnikov (2007),
+       Block Locally Optimal Preconditioned Eigenvalue Xolvers (BLOPEX)
+       in hypre and PETSc.  https://arxiv.org/abs/0705.2626
+
+.. [3] A. V. Knyazev's C and MATLAB implementations:
+       https://bitbucket.org/joseroman/blopex
+"""
+
+from __future__ import division, print_function, absolute_import
+import warnings
+import numpy as np
+from scipy.linalg import (inv, eigh, cho_factor, cho_solve, cholesky,
+                          LinAlgError)
+from scipy.sparse.linalg import aslinearoperator
+
+__all__ = ['lobpcg']
+
+
+def bmat(*args, **kwargs):
+    with warnings.catch_warnings(record=True):
+        warnings.filterwarnings(
+            'ignore', '.*the matrix subclass is not the recommended way.*')
+        return np.bmat(*args, **kwargs)
+
+
+def _save(ar, fileName):
+    # Used only when verbosity level > 10.
+    np.savetxt(fileName, ar)
+
+
+def _report_nonhermitian(M, a, b, name):
+    """
+    Report if `M` is not a hermitian matrix given the tolerances `a`, `b`.
+    """
+    from scipy.linalg import norm
+
+    md = M - M.T.conj()
+
+    nmd = norm(md, 1)
+    tol = np.spacing(max(10**a, (10**b)*norm(M, 1)))
+    if nmd > tol:
+        print('matrix %s is not sufficiently Hermitian for a=%d, b=%d:'
+              % (name, a, b))
+        print('condition: %.e < %e' % (nmd, tol))
+
+
+def _as2d(ar):
+    """
+    If the input array is 2D return it, if it is 1D, append a dimension,
+    making it a column vector.
+    """
+    if ar.ndim == 2:
+        return ar
+    else:  # Assume 1!
+        aux = np.array(ar, copy=False)
+        aux.shape = (ar.shape[0], 1)
+        return aux
+
+
+def _makeOperator(operatorInput, expectedShape):
+    """Takes a dense numpy array or a sparse matrix or
+    a function and makes an operator performing matrix * blockvector
+    products."""
+    if operatorInput is None:
+        return None
+    else:
+        operator = aslinearoperator(operatorInput)
+
+    if operator.shape != expectedShape:
+        raise ValueError('operator has invalid shape')
+
+    return operator
+
+
+def _applyConstraints(blockVectorV, factYBY, blockVectorBY, blockVectorY):
+    """Changes blockVectorV in place."""
+    gramYBV = np.dot(blockVectorBY.T.conj(), blockVectorV)
+    tmp = cho_solve(factYBY, gramYBV)
+    blockVectorV -= np.dot(blockVectorY, tmp)
+
+
+def _b_orthonormalize(B, blockVectorV, blockVectorBV=None, retInvR=False):
+    if blockVectorBV is None:
+        if B is not None:
+            blockVectorBV = B(blockVectorV)
+        else:
+            blockVectorBV = blockVectorV  # Shared data!!!
+    gramVBV = np.dot(blockVectorV.T.conj(), blockVectorBV)
+    gramVBV = cholesky(gramVBV)
+    gramVBV = inv(gramVBV, overwrite_a=True)
+    # gramVBV is now R^{-1}.
+    blockVectorV = np.dot(blockVectorV, gramVBV)
+    if B is not None:
+        blockVectorBV = np.dot(blockVectorBV, gramVBV)
+    else:
+        blockVectorBV = None
+
+    if retInvR:
+        return blockVectorV, blockVectorBV, gramVBV
+    else:
+        return blockVectorV, blockVectorBV
+
+
+def _get_indx(_lambda, num, largest):
+    """Get `num` indices into `_lambda` depending on `largest` option."""
+    ii = np.argsort(_lambda)
+    if largest:
+        ii = ii[:-num-1:-1]
+    else:
+        ii = ii[:num]
+
+    return ii
+
+
+def lobpcg(A, X,
+           B=None, M=None, Y=None,
+           tol=None, maxiter=20,
+           largest=True, verbosityLevel=0,
+           retLambdaHistory=False, retResidualNormsHistory=False):
+    """Locally Optimal Block Preconditioned Conjugate Gradient Method (LOBPCG)
+
+    LOBPCG is a preconditioned eigensolver for large symmetric positive
+    definite (SPD) generalized eigenproblems.
+
+    Parameters
+    ----------
+    A : {sparse matrix, dense matrix, LinearOperator}
+        The symmetric linear operator of the problem, usually a
+        sparse matrix.  Often called the "stiffness matrix".
+    X : array_like
+        Initial approximation to the k eigenvectors. If A has
+        shape=(n,n) then X should have shape shape=(n,k).
+    B : {dense matrix, sparse matrix, LinearOperator}, optional
+        the right hand side operator in a generalized eigenproblem.
+        by default, B = Identity
+        often called the "mass matrix"
+    M : {dense matrix, sparse matrix, LinearOperator}, optional
+        preconditioner to A; by default M = Identity
+        M should approximate the inverse of A
+    Y : array_like, optional
+        n-by-sizeY matrix of constraints, sizeY < n
+        The iterations will be performed in the B-orthogonal complement
+        of the column-space of Y. Y must be full rank.
+    tol : scalar, optional
+        Solver tolerance (stopping criterion)
+        by default: tol=n*sqrt(eps)
+    maxiter : integer, optional
+        maximum number of iterations
+        by default: maxiter=min(n,20)
+    largest : bool, optional
+        when True, solve for the largest eigenvalues, otherwise the smallest
+    verbosityLevel : integer, optional
+        controls solver output.  default: verbosityLevel = 0.
+    retLambdaHistory : boolean, optional
+        whether to return eigenvalue history
+    retResidualNormsHistory : boolean, optional
+        whether to return history of residual norms
+
+    Returns
+    -------
+    w : array
+        Array of k eigenvalues
+    v : array
+        An array of k eigenvectors.  V has the same shape as X.
+    lambdas : list of arrays, optional
+        The eigenvalue history, if `retLambdaHistory` is True.
+    rnorms : list of arrays, optional
+        The history of residual norms, if `retResidualNormsHistory` is True.
+
+    Examples
+    --------
+
+    Solve A x = lambda B x with constraints and preconditioning.
+
+    >>> from scipy.sparse import spdiags, issparse
+    >>> from scipy.sparse.linalg import lobpcg, LinearOperator
+    >>> n = 100
+    >>> vals = [np.arange(n, dtype=np.float64) + 1]
+    >>> A = spdiags(vals, 0, n, n)
+    >>> A.toarray()
+    array([[  1.,   0.,   0., ...,   0.,   0.,   0.],
+           [  0.,   2.,   0., ...,   0.,   0.,   0.],
+           [  0.,   0.,   3., ...,   0.,   0.,   0.],
+           ...,
+           [  0.,   0.,   0., ...,  98.,   0.,   0.],
+           [  0.,   0.,   0., ...,   0.,  99.,   0.],
+           [  0.,   0.,   0., ...,   0.,   0., 100.]])
+
+    Constraints.
+
+    >>> Y = np.eye(n, 3)
+
+    Initial guess for eigenvectors, should have linearly independent
+    columns. Column dimension = number of requested eigenvalues.
+
+    >>> X = np.random.rand(n, 3)
+
+    Preconditioner -- inverse of A (as an abstract linear operator).
+
+    >>> invA = spdiags([1./vals[0]], 0, n, n)
+    >>> def precond( x ):
+    ...     return invA  * x
+    >>> M = LinearOperator(matvec=precond, shape=(n, n), dtype=float)
+
+    Here, ``invA`` could of course have been used directly as a preconditioner.
+    Let us then solve the problem:
+
+    >>> eigs, vecs = lobpcg(A, X, Y=Y, M=M, largest=False)
+    >>> eigs
+    array([4., 5., 6.])
+
+    Note that the vectors passed in Y are the eigenvectors of the 3 smallest
+    eigenvalues. The results returned are orthogonal to those.
+
+    Notes
+    -----
+    If both retLambdaHistory and retResidualNormsHistory are True,
+    the return tuple has the following format
+    (lambda, V, lambda history, residual norms history).
+
+    In the following ``n`` denotes the matrix size and ``m`` the number
+    of required eigenvalues (smallest or largest).
+
+    The LOBPCG code internally solves eigenproblems of the size 3``m`` on every
+    iteration by calling the "standard" dense eigensolver, so if ``m`` is not
+    small enough compared to ``n``, it does not make sense to call the LOBPCG
+    code, but rather one should use the "standard" eigensolver,
+    e.g. numpy or scipy function in this case.
+    If one calls the LOBPCG algorithm for 5``m``>``n``,
+    it will most likely break internally, so the code tries to call
+    the standard function instead.
+
+    It is not that n should be large for the LOBPCG to work, but rather the
+    ratio ``n``/``m`` should be large. It you call LOBPCG with ``m``=1
+    and ``n``=10, it works though ``n`` is small. The method is intended
+    for extremely large ``n``/``m``, see e.g., reference [28] in
+    https://arxiv.org/abs/0705.2626
+
+    The convergence speed depends basically on two factors:
+
+    1. How well relatively separated the seeking eigenvalues are from the rest
+       of the eigenvalues. One can try to vary ``m`` to make this better.
+
+    2. How well conditioned the problem is. This can be changed by using proper
+       preconditioning. For example, a rod vibration test problem (under tests
+       directory) is ill-conditioned for large ``n``, so convergence will be
+       slow, unless efficient preconditioning is used. For this specific
+       problem, a good simple preconditioner function would be a linear solve
+       for A, which is easy to code since A is tridiagonal.
+
+    *Acknowledgements*
+
+    lobpcg.py code was written by Robert Cimrman.
+    Many thanks belong to Andrew Knyazev, the author of the algorithm,
+    for lots of advice and support.
+
+    References
+    ----------
+    .. [1] A. V. Knyazev (2001),
+           Toward the Optimal Preconditioned Eigensolver: Locally Optimal
+           Block Preconditioned Conjugate Gradient Method.
+           SIAM Journal on Scientific Computing 23, no. 2,
+           pp. 517-541. http://dx.doi.org/10.1137/S1064827500366124
+
+    .. [2] A. V. Knyazev, I. Lashuk, M. E. Argentati, and E. Ovchinnikov
+           (2007), Block Locally Optimal Preconditioned Eigenvalue Xolvers
+           (BLOPEX) in hypre and PETSc. https://arxiv.org/abs/0705.2626
+
+    .. [3] A. V. Knyazev's C and MATLAB implementations:
+           https://bitbucket.org/joseroman/blopex
+    """
+    blockVectorX = X
+    blockVectorY = Y
+    residualTolerance = tol
+    maxIterations = maxiter
+
+    if blockVectorY is not None:
+        sizeY = blockVectorY.shape[1]
+    else:
+        sizeY = 0
+
+    # Block size.
+    if len(blockVectorX.shape) != 2:
+        raise ValueError('expected rank-2 array for argument X')
+
+    n, sizeX = blockVectorX.shape
+
+    if verbosityLevel:
+        aux = "Solving "
+        if B is None:
+            aux += "standard"
+        else:
+            aux += "generalized"
+        aux += " eigenvalue problem with"
+        if M is None:
+            aux += "out"
+        aux += " preconditioning\n\n"
+        aux += "matrix size %d\n" % n
+        aux += "block size %d\n\n" % sizeX
+        if blockVectorY is None:
+            aux += "No constraints\n\n"
+        else:
+            if sizeY > 1:
+                aux += "%d constraints\n\n" % sizeY
+            else:
+                aux += "%d constraint\n\n" % sizeY
+        print(aux)
+
+    A = _makeOperator(A, (n, n))
+    B = _makeOperator(B, (n, n))
+    M = _makeOperator(M, (n, n))
+
+    if (n - sizeY) < (5 * sizeX):
+        # warn('The problem size is small compared to the block size.' \
+        #        ' Using dense eigensolver instead of LOBPCG.')
+
+        sizeX = min(sizeX, n)
+
+        if blockVectorY is not None:
+            raise NotImplementedError('The dense eigensolver '
+                                      'does not support constraints.')
+
+        # Define the closed range of indices of eigenvalues to return.
+        if largest:
+            eigvals = (n - sizeX, n-1)
+        else:
+            eigvals = (0, sizeX-1)
+
+        A_dense = A(np.eye(n, dtype=A.dtype))
+        B_dense = None if B is None else B(np.eye(n, dtype=B.dtype))
+
+        vals, vecs = eigh(A_dense, B_dense, eigvals=eigvals,
+                          check_finite=False)
+        if largest:
+            # Reverse order to be compatible with eigs() in 'LM' mode.
+            vals = vals[::-1]
+            vecs = vecs[:, ::-1]
+
+        return vals, vecs
+
+    if (residualTolerance is None) or (residualTolerance <= 0.0):
+        residualTolerance = np.sqrt(1e-15) * n
+
+    # Apply constraints to X.
+    if blockVectorY is not None:
+
+        if B is not None:
+            blockVectorBY = B(blockVectorY)
+        else:
+            blockVectorBY = blockVectorY
+
+        # gramYBY is a dense array.
+        gramYBY = np.dot(blockVectorY.T.conj(), blockVectorBY)
+        try:
+            # gramYBY is a Cholesky factor from now on...
+            gramYBY = cho_factor(gramYBY)
+        except LinAlgError:
+            raise ValueError('cannot handle linearly dependent constraints')
+
+        _applyConstraints(blockVectorX, gramYBY, blockVectorBY, blockVectorY)
+
+    ##
+    # B-orthonormalize X.
+    blockVectorX, blockVectorBX = _b_orthonormalize(B, blockVectorX)
+
+    ##
+    # Compute the initial Ritz vectors: solve the eigenproblem.
+    blockVectorAX = A(blockVectorX)
+    gramXAX = np.dot(blockVectorX.T.conj(), blockVectorAX)
+
+    _lambda, eigBlockVector = eigh(gramXAX, check_finite=False)
+    ii = _get_indx(_lambda, sizeX, largest)
+    _lambda = _lambda[ii]
+
+    eigBlockVector = np.asarray(eigBlockVector[:, ii])
+    blockVectorX = np.dot(blockVectorX, eigBlockVector)
+    blockVectorAX = np.dot(blockVectorAX, eigBlockVector)
+    if B is not None:
+        blockVectorBX = np.dot(blockVectorBX, eigBlockVector)
+
+    ##
+    # Active index set.
+    activeMask = np.ones((sizeX,), dtype=bool)
+
+    lambdaHistory = [_lambda]
+    residualNormsHistory = []
+
+    previousBlockSize = sizeX
+    ident = np.eye(sizeX, dtype=A.dtype)
+    ident0 = np.eye(sizeX, dtype=A.dtype)
+
+    ##
+    # Main iteration loop.
+
+    blockVectorP = None  # set during iteration
+    blockVectorAP = None
+    blockVectorBP = None
+
+    iterationNumber = -1
+    while iterationNumber < maxIterations:
+        iterationNumber += 1
+        if verbosityLevel > 0:
+            print('iteration %d' % iterationNumber)
+
+        if B is not None:
+            aux = blockVectorBX * _lambda[np.newaxis, :]
+
+        else:
+            aux = blockVectorX * _lambda[np.newaxis, :]
+
+        blockVectorR = blockVectorAX - aux
+
+        aux = np.sum(blockVectorR.conjugate() * blockVectorR, 0)
+        residualNorms = np.sqrt(aux)
+
+        residualNormsHistory.append(residualNorms)
+
+        ii = np.where(residualNorms > residualTolerance, True, False)
+        activeMask = activeMask & ii
+        if verbosityLevel > 2:
+            print(activeMask)
+
+        currentBlockSize = activeMask.sum()
+        if currentBlockSize != previousBlockSize:
+            previousBlockSize = currentBlockSize
+            ident = np.eye(currentBlockSize, dtype=A.dtype)
+
+        if currentBlockSize == 0:
+            break
+
+        if verbosityLevel > 0:
+            print('current block size:', currentBlockSize)
+            print('eigenvalue:', _lambda)
+            print('residual norms:', residualNorms)
+        if verbosityLevel > 10:
+            print(eigBlockVector)
+
+        activeBlockVectorR = _as2d(blockVectorR[:, activeMask])
+
+        if iterationNumber > 0:
+            activeBlockVectorP = _as2d(blockVectorP[:, activeMask])
+            activeBlockVectorAP = _as2d(blockVectorAP[:, activeMask])
+            if B is not None:
+                activeBlockVectorBP = _as2d(blockVectorBP[:, activeMask])
+
+        if M is not None:
+            # Apply preconditioner T to the active residuals.
+            activeBlockVectorR = M(activeBlockVectorR)
+
+        ##
+        # Apply constraints to the preconditioned residuals.
+        if blockVectorY is not None:
+            _applyConstraints(activeBlockVectorR,
+                              gramYBY, blockVectorBY, blockVectorY)
+
+        ##
+        # B-orthonormalize the preconditioned residuals.
+
+        aux = _b_orthonormalize(B, activeBlockVectorR)
+        activeBlockVectorR, activeBlockVectorBR = aux
+
+        activeBlockVectorAR = A(activeBlockVectorR)
+
+        if iterationNumber > 0:
+            if B is not None:
+                aux = _b_orthonormalize(B, activeBlockVectorP,
+                                        activeBlockVectorBP, retInvR=True)
+                activeBlockVectorP, activeBlockVectorBP, invR = aux
+                activeBlockVectorAP = np.dot(activeBlockVectorAP, invR)
+
+            else:
+                aux = _b_orthonormalize(B, activeBlockVectorP, retInvR=True)
+                activeBlockVectorP, _, invR = aux
+                activeBlockVectorAP = np.dot(activeBlockVectorAP, invR)
+
+        ##
+        # Perform the Rayleigh Ritz Procedure:
+        # Compute symmetric Gram matrices:
+
+        if B is not None:
+            xaw = np.dot(blockVectorX.T.conj(), activeBlockVectorAR)
+            waw = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorAR)
+            xbw = np.dot(blockVectorX.T.conj(), activeBlockVectorBR)
+
+            if iterationNumber > 0:
+                xap = np.dot(blockVectorX.T.conj(), activeBlockVectorAP)
+                wap = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorAP)
+                pap = np.dot(activeBlockVectorP.T.conj(), activeBlockVectorAP)
+                xbp = np.dot(blockVectorX.T.conj(), activeBlockVectorBP)
+                wbp = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorBP)
+
+                gramA = bmat([[np.diag(_lambda), xaw, xap],
+                              [xaw.T.conj(), waw, wap],
+                              [xap.T.conj(), wap.T.conj(), pap]])
+
+                gramB = bmat([[ident0, xbw, xbp],
+                              [xbw.T.conj(), ident, wbp],
+                              [xbp.T.conj(), wbp.T.conj(), ident]])
+            else:
+                gramA = bmat([[np.diag(_lambda), xaw],
+                              [xaw.T.conj(), waw]])
+                gramB = bmat([[ident0, xbw],
+                              [xbw.T.conj(), ident]])
+
+        else:
+            xaw = np.dot(blockVectorX.T.conj(), activeBlockVectorAR)
+            waw = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorAR)
+            xbw = np.dot(blockVectorX.T.conj(), activeBlockVectorR)
+
+            if iterationNumber > 0:
+                xap = np.dot(blockVectorX.T.conj(), activeBlockVectorAP)
+                wap = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorAP)
+                pap = np.dot(activeBlockVectorP.T.conj(), activeBlockVectorAP)
+                xbp = np.dot(blockVectorX.T.conj(), activeBlockVectorP)
+                wbp = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorP)
+
+                gramA = bmat([[np.diag(_lambda), xaw, xap],
+                              [xaw.T.conj(), waw, wap],
+                              [xap.T.conj(), wap.T.conj(), pap]])
+
+                gramB = bmat([[ident0, xbw, xbp],
+                              [xbw.T.conj(), ident, wbp],
+                              [xbp.T.conj(), wbp.T.conj(), ident]])
+            else:
+                gramA = bmat([[np.diag(_lambda), xaw],
+                              [xaw.T.conj(), waw]])
+                gramB = bmat([[ident0, xbw],
+                              [xbw.T.conj(), ident]])
+
+        if verbosityLevel > 0:
+            _report_nonhermitian(gramA, 3, -1, 'gramA')
+            _report_nonhermitian(gramB, 3, -1, 'gramB')
+
+        if verbosityLevel > 10:
+            _save(gramA, 'gramA')
+            _save(gramB, 'gramB')
+
+        # Solve the generalized eigenvalue problem.
+        _lambda, eigBlockVector = eigh(gramA, gramB, check_finite=False)
+        ii = _get_indx(_lambda, sizeX, largest)
+
+        if verbosityLevel > 10:
+            print(ii)
+            print(_lambda)
+
+        _lambda = _lambda[ii]
+        eigBlockVector = eigBlockVector[:, ii]
+
+        lambdaHistory.append(_lambda)
+
+        if verbosityLevel > 10:
+            print('lambda:', _lambda)
+#         # Normalize eigenvectors!
+#         aux = np.sum( eigBlockVector.conjugate() * eigBlockVector, 0 )
+#         eigVecNorms = np.sqrt( aux )
+#         eigBlockVector = eigBlockVector / eigVecNorms[np.newaxis, :]
+#         eigBlockVector, aux = _b_orthonormalize( B, eigBlockVector )
+
+        if verbosityLevel > 10:
+            print(eigBlockVector)
+
+        # Compute Ritz vectors.
+        if B is not None:
+            if iterationNumber > 0:
+                eigBlockVectorX = eigBlockVector[:sizeX]
+                eigBlockVectorR = eigBlockVector[sizeX:sizeX+currentBlockSize]
+                eigBlockVectorP = eigBlockVector[sizeX+currentBlockSize:]
+
+                pp = np.dot(activeBlockVectorR, eigBlockVectorR)
+                pp += np.dot(activeBlockVectorP, eigBlockVectorP)
+
+                app = np.dot(activeBlockVectorAR, eigBlockVectorR)
+                app += np.dot(activeBlockVectorAP, eigBlockVectorP)
+
+                bpp = np.dot(activeBlockVectorBR, eigBlockVectorR)
+                bpp += np.dot(activeBlockVectorBP, eigBlockVectorP)
+            else:
+                eigBlockVectorX = eigBlockVector[:sizeX]
+                eigBlockVectorR = eigBlockVector[sizeX:]
+
+                pp = np.dot(activeBlockVectorR, eigBlockVectorR)
+                app = np.dot(activeBlockVectorAR, eigBlockVectorR)
+                bpp = np.dot(activeBlockVectorBR, eigBlockVectorR)
+
+            if verbosityLevel > 10:
+                print(pp)
+                print(app)
+                print(bpp)
+
+            blockVectorX = np.dot(blockVectorX, eigBlockVectorX) + pp
+            blockVectorAX = np.dot(blockVectorAX, eigBlockVectorX) + app
+            blockVectorBX = np.dot(blockVectorBX, eigBlockVectorX) + bpp
+
+            blockVectorP, blockVectorAP, blockVectorBP = pp, app, bpp
+
+        else:
+            if iterationNumber > 0:
+                eigBlockVectorX = eigBlockVector[:sizeX]
+                eigBlockVectorR = eigBlockVector[sizeX:sizeX+currentBlockSize]
+                eigBlockVectorP = eigBlockVector[sizeX+currentBlockSize:]
+
+                pp = np.dot(activeBlockVectorR, eigBlockVectorR)
+                pp += np.dot(activeBlockVectorP, eigBlockVectorP)
+
+                app = np.dot(activeBlockVectorAR, eigBlockVectorR)
+                app += np.dot(activeBlockVectorAP, eigBlockVectorP)
+            else:
+                eigBlockVectorX = eigBlockVector[:sizeX]
+                eigBlockVectorR = eigBlockVector[sizeX:]
+
+                pp = np.dot(activeBlockVectorR, eigBlockVectorR)
+                app = np.dot(activeBlockVectorAR, eigBlockVectorR)
+
+            if verbosityLevel > 10:
+                print(pp)
+                print(app)
+
+            blockVectorX = np.dot(blockVectorX, eigBlockVectorX) + pp
+            blockVectorAX = np.dot(blockVectorAX, eigBlockVectorX) + app
+
+            blockVectorP, blockVectorAP = pp, app
+
+    if B is not None:
+        aux = blockVectorBX * _lambda[np.newaxis, :]
+
+    else:
+        aux = blockVectorX * _lambda[np.newaxis, :]
+
+    blockVectorR = blockVectorAX - aux
+
+    aux = np.sum(blockVectorR.conjugate() * blockVectorR, 0)
+    residualNorms = np.sqrt(aux)
+
+    if verbosityLevel > 0:
+        print('final eigenvalue:', _lambda)
+        print('final residual norms:', residualNorms)
+
+    if retLambdaHistory:
+        if retResidualNormsHistory:
+            return _lambda, blockVectorX, lambdaHistory, residualNormsHistory
+        else:
+            return _lambda, blockVectorX, lambdaHistory
+    else:
+        if retResidualNormsHistory:
+            return _lambda, blockVectorX, residualNormsHistory
+        else:
+            return _lambda, blockVectorX
diff --git a/sklearn/feature_selection/tests/test_variance_threshold.py b/sklearn/feature_selection/tests/test_variance_threshold.py
index fba4478a28e2f..53a90ace37a40 100644
--- a/sklearn/feature_selection/tests/test_variance_threshold.py
+++ b/sklearn/feature_selection/tests/test_variance_threshold.py
@@ -38,7 +38,9 @@ def test_zero_variance_floating_point_error():
     # See #13691
 
     data = [[-0.13725701]] * 10
-    assert np.var(data) != 0
+    if np.var(data) == 0:
+        pytest.skip('This test is not valid for this platform, as it relies '
+                    'on numerical instabilities.')
     for X in [data, csr_matrix(data), csc_matrix(data), bsr_matrix(data)]:
         msg = "No feature in X meets the variance threshold 0.00000"
         with pytest.raises(ValueError, match=msg):
diff --git a/sklearn/impute/tests/test_impute.py b/sklearn/impute/tests/test_impute.py
index 9562eb54adb3d..ebabc5c311f6b 100644
--- a/sklearn/impute/tests/test_impute.py
+++ b/sklearn/impute/tests/test_impute.py
@@ -65,21 +65,22 @@ def _check_statistics(X, X_true,
     assert_ae(X_trans, X_true, err_msg=err_msg.format(True))
 
 
-def test_imputation_shape():
+@pytest.mark.parametrize("strategy",
+                         ['mean', 'median', 'most_frequent', "constant"])
+def test_imputation_shape(strategy):
     # Verify the shapes of the imputed matrix for different strategies.
     X = np.random.randn(10, 2)
     X[::2] = np.nan
 
-    for strategy in ['mean', 'median', 'most_frequent', "constant"]:
-        imputer = SimpleImputer(strategy=strategy)
-        X_imputed = imputer.fit_transform(sparse.csr_matrix(X))
-        assert X_imputed.shape == (10, 2)
-        X_imputed = imputer.fit_transform(X)
-        assert X_imputed.shape == (10, 2)
-
-        iterative_imputer = IterativeImputer(initial_strategy=strategy)
-        X_imputed = iterative_imputer.fit_transform(X)
-        assert X_imputed.shape == (10, 2)
+    imputer = SimpleImputer(strategy=strategy)
+    X_imputed = imputer.fit_transform(sparse.csr_matrix(X))
+    assert X_imputed.shape == (10, 2)
+    X_imputed = imputer.fit_transform(X)
+    assert X_imputed.shape == (10, 2)
+
+    iterative_imputer = IterativeImputer(initial_strategy=strategy)
+    X_imputed = iterative_imputer.fit_transform(X)
+    assert X_imputed.shape == (10, 2)
 
 
 @pytest.mark.parametrize("strategy", ["const", 101, None])
diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py
index ddaf0f5e63d0f..cfc487c6ffe66 100644
--- a/sklearn/linear_model/tests/test_ridge.py
+++ b/sklearn/linear_model/tests/test_ridge.py
@@ -1099,13 +1099,14 @@ def test_dtype_match(solver):
     X_32 = X_64.astype(np.float32)
     y_32 = y_64.astype(np.float32)
 
+    tol = 2 * np.finfo(np.float32).resolution
     # Check type consistency 32bits
-    ridge_32 = Ridge(alpha=alpha, solver=solver, max_iter=500, tol=1e-10,)
+    ridge_32 = Ridge(alpha=alpha, solver=solver, max_iter=500, tol=tol)
     ridge_32.fit(X_32, y_32)
     coef_32 = ridge_32.coef_
 
     # Check type consistency 64 bits
-    ridge_64 = Ridge(alpha=alpha, solver=solver, max_iter=500, tol=1e-10,)
+    ridge_64 = Ridge(alpha=alpha, solver=solver, max_iter=500, tol=tol)
     ridge_64.fit(X_64, y_64)
     coef_64 = ridge_64.coef_
 
diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/spectral_embedding_.py
index a6d5af54f9bc4..42227db8a72ad 100644
--- a/sklearn/manifold/spectral_embedding_.py
+++ b/sklearn/manifold/spectral_embedding_.py
@@ -10,13 +10,14 @@
 import numpy as np
 from scipy import sparse
 from scipy.linalg import eigh
-from scipy.sparse.linalg import eigsh, lobpcg
+from scipy.sparse.linalg import eigsh
 from scipy.sparse.csgraph import connected_components
 from scipy.sparse.csgraph import laplacian as csgraph_laplacian
 
 from ..base import BaseEstimator
 from ..utils import check_random_state, check_array, check_symmetric
 from ..utils.extmath import _deterministic_vector_sign_flip
+from ..utils.fixes import lobpcg
 from ..metrics.pairwise import rbf_kernel
 from ..neighbors import kneighbors_graph
 
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index 7d2c60d34b02d..5b402c17ee86f 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -1411,6 +1411,21 @@ class RandomizedSearchCV(BaseSearchCV):
         A generator over parameter settings, constructed from
         param_distributions.
 
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_iris
+    >>> from sklearn.linear_model import LogisticRegression
+    >>> from sklearn.model_selection import RandomizedSearchCV
+    >>> from scipy.stats import uniform
+    >>> iris = load_iris()
+    >>> logistic = LogisticRegression(solver='saga', tol=1e-2, max_iter=200)
+    >>> distributions = dict(C=uniform(loc=0, scale=4),
+    ...                      penalty=['l2', 'l1'])
+    >>> clf = RandomizedSearchCV(logistic, distributions, random_state=0)
+    >>> search = clf.fit(iris.data, iris.target)
+    >>> search.best_params_
+    {'C': 2..., 'penalty': 'l1'}
     """
     _required_parameters = ["estimator", "param_distributions"]
 
diff --git a/sklearn/preprocessing/tests/test_discretization.py b/sklearn/preprocessing/tests/test_discretization.py
index 102b789eb093d..6dd0abdb99e9f 100644
--- a/sklearn/preprocessing/tests/test_discretization.py
+++ b/sklearn/preprocessing/tests/test_discretization.py
@@ -135,15 +135,15 @@ def test_transform_1d_behavior():
     assert_raises(ValueError, est.transform, X)
 
 
-def test_numeric_stability():
+@pytest.mark.parametrize('i', range(1, 9))
+def test_numeric_stability(i):
     X_init = np.array([2., 4., 6., 8., 10.]).reshape(-1, 1)
     Xt_expected = np.array([0, 0, 1, 1, 1]).reshape(-1, 1)
 
     # Test up to discretizing nano units
-    for i in range(1, 9):
-        X = X_init / 10**i
-        Xt = KBinsDiscretizer(n_bins=2, encode='ordinal').fit_transform(X)
-        assert_array_equal(Xt_expected, Xt)
+    X = X_init / 10**i
+    Xt = KBinsDiscretizer(n_bins=2, encode='ordinal').fit_transform(X)
+    assert_array_equal(Xt_expected, Xt)
 
 
 def test_invalid_encode_option():
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 159b89846cb39..0bec5c3911681 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -2506,4 +2506,12 @@ def check_fit_idempotent(name, estimator_orig):
     for method in check_methods:
         if hasattr(estimator, method):
             new_result = getattr(estimator, method)(X_test)
-            assert_allclose_dense_sparse(result[method], new_result)
+            if np.issubdtype(new_result.dtype, np.floating):
+                tol = 2*np.finfo(new_result.dtype).eps
+            else:
+                tol = 2*np.finfo(np.float64).eps
+            assert_allclose_dense_sparse(
+                result[method], new_result,
+                atol=max(tol, 1e-9), rtol=max(tol, 1e-7),
+                err_msg="Idempotency check failed for method {}".format(method)
+            )
diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py
index 712b6826a2b75..2481eb39c9d0c 100644
--- a/sklearn/utils/fixes.py
+++ b/sklearn/utils/fixes.py
@@ -38,6 +38,12 @@ def _parse_version(version_string):
 except ImportError:
     from scipy.misc import comb, logsumexp  # noqa
 
+if sp_version >= (1, 3):
+    from scipy.sparse.linalg import lobpcg
+else:
+    # Backport of lobpcg functionality from scipy 1.3.0, can be removed
+    # once support for sp_version < (1, 3) is dropped
+    from ..externals._lobpcg import lobpcg  # noqa
 
 if sp_version >= (0, 19):
     def _argmax(arr_or_spmatrix, axis=None):

From 828b662266a653eddc8c1be18420c48ba732ea49 Mon Sep 17 00:00:00 2001
From: "johann.faouzi" <johann.faouzi@icm-institute.org>
Date: Mon, 1 Jul 2019 14:02:10 +0200
Subject: [PATCH 04/11] Add resample in imports

---
 sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
index 011907df5ec39..3650867b752cc 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
@@ -7,7 +7,7 @@
 from timeit import default_timer as time
 from ...base import (BaseEstimator, RegressorMixin, ClassifierMixin,
                      is_classifier)
-from ...utils import check_X_y, check_random_state, check_array
+from ...utils import check_X_y, check_random_state, check_array, resample
 from ...utils.validation import check_is_fitted
 from ...utils.multiclass import check_classification_targets
 from ...metrics import check_scoring

From d5fd250cc743c0b73e7b520da9017f10b62d3787 Mon Sep 17 00:00:00 2001
From: "johann.faouzi" <johann.faouzi@icm-institute.org>
Date: Mon, 1 Jul 2019 16:14:53 +0200
Subject: [PATCH 05/11] Add tests for small trainset computation

---
 .../tests/test_gradient_boosting.py           | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
index 86b3eeb239c3d..155c20edb5099 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
@@ -190,3 +190,29 @@ def test_zero_division_hessians(data):
     X, y = data
     gb = HistGradientBoostingClassifier(learning_rate=100, max_iter=10)
     gb.fit(X, y)
+
+
+@pytest.mark.parametrize('GradientBoosting, data', [
+    (HistGradientBoostingClassifier,
+     make_classification(n_samples=10001, random_state=0, n_features=2,
+                         n_informative=2, n_redundant=0)),
+    (HistGradientBoostingRegressor,
+     make_regression(n_samples=10001, random_state=0, n_features=2,
+                     n_informative=2))]
+)
+def test_small_trainset(GradientBoosting, data):
+    # Make sure that a small trainset has the expected length (10k samples)
+    X, y = data
+    gb = GradientBoosting(random_state=42)
+    X_small, y_small = gb._get_small_trainset(X, y, seed=42)
+    assert X_small.shape[0] == 10000
+    assert y_small.shape[0] == 10000
+
+
+def test_stratification_small_trainset():
+    # Make sure that the small trainset is stratified
+    X, y = make_classification(n_samples=20000, n_features=2,
+                               n_informative=2, n_redundant=0)
+    gb = HistGradientBoostingClassifier(random_state=42)
+    X_small_train, y_small_train = gb._get_small_trainset(X, y, seed=42)
+    np.testing.assert_almost_equal(y.mean(), y_small_train.mean(), decimal=3)

From 058383d6676fe7567d86b67b8853d8f1c7ce0020 Mon Sep 17 00:00:00 2001
From: "johann.faouzi" <johann.faouzi@icm-institute.org>
Date: Mon, 1 Jul 2019 16:57:09 +0200
Subject: [PATCH 06/11] Make an imbalanced dataset with a deterministic
 balance.

---
 .../tests/test_gradient_boosting.py                  | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
index 155c20edb5099..6a9e95e91efae 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
@@ -211,8 +211,14 @@ def test_small_trainset(GradientBoosting, data):
 
 def test_stratification_small_trainset():
     # Make sure that the small trainset is stratified
-    X, y = make_classification(n_samples=20000, n_features=2,
-                               n_informative=2, n_redundant=0)
+    n_samples = 20000
+    class_one_prop = 0.1
+    rng = np.random.RandomState(42)
+    X = rng.randn(n_samples).reshape(n_samples, 1)
+    y = np.asarray(
+        [0] * int(n_samples * (1 - class_one_prop))
+        + [1] * int(n_samples * class_one_prop)
+    )
     gb = HistGradientBoostingClassifier(random_state=42)
     X_small_train, y_small_train = gb._get_small_trainset(X, y, seed=42)
-    np.testing.assert_almost_equal(y.mean(), y_small_train.mean(), decimal=3)
+    np.testing.assert_equal(y_small_train.mean(), class_one_prop)

From 4a02b0127391ebe3ad00053237e4949caff09387 Mon Sep 17 00:00:00 2001
From: "johann.faouzi" <johann.faouzi@icm-institute.org>
Date: Mon, 1 Jul 2019 17:13:48 +0200
Subject: [PATCH 07/11] Use assert instead of assert_equal

---
 .../_hist_gradient_boosting/tests/test_gradient_boosting.py     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
index 6a9e95e91efae..ea5457e9f9f6e 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
@@ -221,4 +221,4 @@ def test_stratification_small_trainset():
     )
     gb = HistGradientBoostingClassifier(random_state=42)
     X_small_train, y_small_train = gb._get_small_trainset(X, y, seed=42)
-    np.testing.assert_equal(y_small_train.mean(), class_one_prop)
+    assert y_small_train.mean() == class_one_prop

From 6ff6fa8de70e20e459b0bf16b4ff1adcbf9cf244 Mon Sep 17 00:00:00 2001
From: "johann.faouzi" <johann.faouzi@icm-institute.org>
Date: Mon, 1 Jul 2019 17:26:27 +0200
Subject: [PATCH 08/11] Merge both tests

---
 .../tests/test_gradient_boosting.py           | 35 +++++--------------
 1 file changed, 9 insertions(+), 26 deletions(-)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
index ea5457e9f9f6e..2b33095b13a64 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
@@ -192,33 +192,16 @@ def test_zero_division_hessians(data):
     gb.fit(X, y)
 
 
-@pytest.mark.parametrize('GradientBoosting, data', [
-    (HistGradientBoostingClassifier,
-     make_classification(n_samples=10001, random_state=0, n_features=2,
-                         n_informative=2, n_redundant=0)),
-    (HistGradientBoostingRegressor,
-     make_regression(n_samples=10001, random_state=0, n_features=2,
-                     n_informative=2))]
-)
-def test_small_trainset(GradientBoosting, data):
-    # Make sure that a small trainset has the expected length (10k samples)
-    X, y = data
-    gb = GradientBoosting(random_state=42)
-    X_small, y_small = gb._get_small_trainset(X, y, seed=42)
-    assert X_small.shape[0] == 10000
-    assert y_small.shape[0] == 10000
-
-
-def test_stratification_small_trainset():
-    # Make sure that the small trainset is stratified
+def test_small_trainset():
+    # Make sure that the small trainset is stratified and has the expected
+    # length (10k samples)
     n_samples = 20000
     class_one_prop = 0.1
     rng = np.random.RandomState(42)
     X = rng.randn(n_samples).reshape(n_samples, 1)
-    y = np.asarray(
-        [0] * int(n_samples * (1 - class_one_prop))
-        + [1] * int(n_samples * class_one_prop)
-    )
-    gb = HistGradientBoostingClassifier(random_state=42)
-    X_small_train, y_small_train = gb._get_small_trainset(X, y, seed=42)
-    assert y_small_train.mean() == class_one_prop
+    y = rng.binomial(1, p=0.1, size=n_samples)
+    gb = HistGradientBoostingClassifier()
+    X_small, y_small = gb._get_small_trainset(X, y, seed=42)
+    assert X_small.shape[0] == 10000
+    assert y_small.shape[0] == 10000
+    assert y_small.mean() == pytest.approx(class_one_prop, rel=0, abs=1e-2)

From 98755bed1129a87fbdc6a4f772810ac2fdd1a94c Mon Sep 17 00:00:00 2001
From: "johann.faouzi" <johann.faouzi@icm-institute.org>
Date: Tue, 2 Jul 2019 08:49:48 +0200
Subject: [PATCH 09/11] Make the class distribution deterministic and add more
 classes

---
 .../tests/test_gradient_boosting.py           | 21 ++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
index 2b33095b13a64..93ca437f7fdeb 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
@@ -7,6 +7,7 @@
 from sklearn.ensemble import HistGradientBoostingRegressor
 from sklearn.ensemble import HistGradientBoostingClassifier
 from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper
+from sklearn.utils import shuffle
 
 
 X_classification, y_classification = make_classification(random_state=0)
@@ -196,12 +197,26 @@ def test_small_trainset():
     # Make sure that the small trainset is stratified and has the expected
     # length (10k samples)
     n_samples = 20000
-    class_one_prop = 0.1
+    original_prop = {0: 0.1, 1: 0.2, 2: 0.3, 3: 0.4}
     rng = np.random.RandomState(42)
     X = rng.randn(n_samples).reshape(n_samples, 1)
-    y = rng.binomial(1, p=0.1, size=n_samples)
+    y = [[class_] * int(prop * n_samples) for (class_, prop)
+         in original_prop.items()]
+    y = shuffle(np.concatenate(y))
     gb = HistGradientBoostingClassifier()
+
+    # Compute the small training set
     X_small, y_small = gb._get_small_trainset(X, y, seed=42)
+
+    # Compute the class distribution in the small training set
+    unique, counts = np.unique(y_small, return_counts=True)
+    small_prop = {class_: count / 10000 for (class_, count)
+                  in zip(unique, counts)}
+
+    # Test that the small training set has the expected length
     assert X_small.shape[0] == 10000
     assert y_small.shape[0] == 10000
-    assert y_small.mean() == pytest.approx(class_one_prop, rel=0, abs=1e-2)
+
+    # Test that the class distributions in the whole dataset and in the small
+    # training set are identical
+    assert small_prop == original_prop

From dedfa4b64ea6ba656d5a3759aa1f3630fa210567 Mon Sep 17 00:00:00 2001
From: "johann.faouzi" <johann.faouzi@icm-institute.org>
Date: Tue, 2 Jul 2019 17:00:10 +0200
Subject: [PATCH 10/11] Use pytest.approx to compare the dictionaries

---
 .../tests/test_gradient_boosting.py                    | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
index 93ca437f7fdeb..8c3f2188b7434 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
@@ -197,11 +197,11 @@ def test_small_trainset():
     # Make sure that the small trainset is stratified and has the expected
     # length (10k samples)
     n_samples = 20000
-    original_prop = {0: 0.1, 1: 0.2, 2: 0.3, 3: 0.4}
+    original_distrib = {0: 0.1, 1: 0.2, 2: 0.3, 3: 0.4}
     rng = np.random.RandomState(42)
     X = rng.randn(n_samples).reshape(n_samples, 1)
     y = [[class_] * int(prop * n_samples) for (class_, prop)
-         in original_prop.items()]
+         in original_distrib.items()]
     y = shuffle(np.concatenate(y))
     gb = HistGradientBoostingClassifier()
 
@@ -210,8 +210,8 @@ def test_small_trainset():
 
     # Compute the class distribution in the small training set
     unique, counts = np.unique(y_small, return_counts=True)
-    small_prop = {class_: count / 10000 for (class_, count)
-                  in zip(unique, counts)}
+    small_distrib = {class_: count / 10000 for (class_, count)
+                     in zip(unique, counts)}
 
     # Test that the small training set has the expected length
     assert X_small.shape[0] == 10000
@@ -219,4 +219,4 @@ def test_small_trainset():
 
     # Test that the class distributions in the whole dataset and in the small
     # training set are identical
-    assert small_prop == original_prop
+    assert small_distrib == pytest.approx(original_distrib)

From 92de4948f7f07e44de40e11aa84584623593120a Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 8 Jul 2019 09:53:25 +0200
Subject: [PATCH 11/11] DOC add changelog entry for stratified GBDT training
 loss

---
 doc/whats_new/v0.22.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
index 1089284a9f6a9..1aea9bba2ce8c 100644
--- a/doc/whats_new/v0.22.rst
+++ b/doc/whats_new/v0.22.rst
@@ -60,6 +60,11 @@ Changelog
   parameter called `warm_start` that enables warm starting. :pr:`14012` by
   :user:`Johann Faouzi <johannfaouzi>`.
 
+- |Enhancement| :class:`ensemble.HistGradientBoostingClassifier` the training
+  loss or score is now monitored on a class-wise stratified subsample to
+  preserve the class balance of the original training set. :pr:`14194`
+  by :user:`Johann Faouzi <johannfaouzi>`.
+
 :mod:`sklearn.linear_model`
 ...........................