diff --git a/sklearn/externals/_lobpcg.py b/sklearn/externals/_lobpcg.py
index 1fd9e16ba0748..1de3900b3f89c 100644
--- a/sklearn/externals/_lobpcg.py
+++ b/sklearn/externals/_lobpcg.py
@@ -1,5 +1,5 @@
 """
-scikit-learn copy of scipy/sparse/linalg/eigen/lobpcg/lobpcg.py v1.7.1
+scikit-learn copy of scipy/sparse/linalg/eigen/lobpcg/lobpcg.py v1.8.0
 to be deleted after scipy 1.3.0 becomes a dependency in scikit-lean
 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 Locally Optimal Block Preconditioned Conjugate Gradient Method (LOBPCG).
@@ -20,13 +20,14 @@
        https://github.com/lobpcg/blopex
 """
 
+import warnings
 import numpy as np
-from scipy.linalg import (inv, eigh, cho_factor, cho_solve, cholesky,
-                          LinAlgError)
+from scipy.linalg import (inv, eigh, cho_factor, cho_solve,
+                          cholesky, LinAlgError)
 from scipy.sparse.linalg import aslinearoperator
 from numpy import block as bmat
 
-__all__ = ['lobpcg']
+__all__ = ["lobpcg"]
 
 
 def _report_nonhermitian(M, name):
@@ -36,15 +37,15 @@ def _report_nonhermitian(M, name):
     from scipy.linalg import norm
 
     md = M - M.T.conj()
-
     nmd = norm(md, 1)
     tol = 10 * np.finfo(M.dtype).eps
     tol = max(tol, tol * norm(M, 1))
     if nmd > tol:
-        print('matrix %s of the type %s is not sufficiently Hermitian:'
-              % (name, M.dtype))
-        print('condition: %.e < %e' % (nmd, tol))
-
+        warnings.warn(
+              f"Matrix {name} of the type {M.dtype} is not Hermitian: "
+              f"condition: {nmd} < {tol} fails.",
+              UserWarning, stacklevel=4
+         )
 
 def _as2d(ar):
     """
@@ -69,7 +70,7 @@ def _makeOperator(operatorInput, expectedShape):
         operator = aslinearoperator(operatorInput)
 
     if operator.shape != expectedShape:
-        raise ValueError('operator has invalid shape')
+        raise ValueError("operator has invalid shape")
 
     return operator
 
@@ -83,7 +84,7 @@ def _applyConstraints(blockVectorV, factYBY, blockVectorBY, blockVectorY):
 
 def _b_orthonormalize(B, blockVectorV, blockVectorBV=None, retInvR=False):
     """B-orthonormalize the given block vector using Cholesky."""
-    normalization = blockVectorV.max(axis=0)+np.finfo(blockVectorV.dtype).eps
+    normalization = blockVectorV.max(axis=0) + np.finfo(blockVectorV.dtype).eps
     blockVectorV = blockVectorV / normalization
     if blockVectorBV is None:
         if B is not None:
@@ -92,20 +93,20 @@ def _b_orthonormalize(B, blockVectorV, blockVectorBV=None, retInvR=False):
             blockVectorBV = blockVectorV  # Shared data!!!
     else:
         blockVectorBV = blockVectorBV / normalization
-    VBV = np.matmul(blockVectorV.T.conj(), blockVectorBV)
+    VBV = blockVectorV.T.conj() @ blockVectorBV
     try:
         # VBV is a Cholesky factor from now on...
         VBV = cholesky(VBV, overwrite_a=True)
         VBV = inv(VBV, overwrite_a=True)
-        blockVectorV = np.matmul(blockVectorV, VBV)
+        blockVectorV = blockVectorV @ VBV
         # blockVectorV = (cho_solve((VBV.T, True), blockVectorV.T)).T
         if B is not None:
-            blockVectorBV = np.matmul(blockVectorBV, VBV)
+            blockVectorBV = blockVectorBV @ VBV
             # blockVectorBV = (cho_solve((VBV.T, True), blockVectorBV.T)).T
         else:
             blockVectorBV = None
     except LinAlgError:
-        #raise ValueError('Cholesky has failed')
+        # raise ValueError('Cholesky has failed')
         blockVectorV = None
         blockVectorBV = None
         VBV = None
@@ -120,18 +121,26 @@ def _get_indx(_lambda, num, largest):
     """Get `num` indices into `_lambda` depending on `largest` option."""
     ii = np.argsort(_lambda)
     if largest:
-        ii = ii[:-num-1:-1]
+        ii = ii[:-num - 1:-1]
     else:
         ii = ii[:num]
 
     return ii
 
 
-def lobpcg(A, X,
-           B=None, M=None, Y=None,
-           tol=None, maxiter=None,
-           largest=True, verbosityLevel=0,
-           retLambdaHistory=False, retResidualNormsHistory=False):
+def lobpcg(
+    A,
+    X,
+    B=None,
+    M=None,
+    Y=None,
+    tol=None,
+    maxiter=None,
+    largest=True,
+    verbosityLevel=0,
+    retLambdaHistory=False,
+    retResidualNormsHistory=False,
+):
     """Locally Optimal Block Preconditioned Conjugate Gradient Method (LOBPCG)
 
     LOBPCG is a preconditioned eigensolver for large symmetric positive
@@ -200,7 +209,7 @@ def lobpcg(A, X,
     It is not that ``n`` should be large for the LOBPCG to work, but rather the
     ratio ``n / m`` should be large. It you call LOBPCG with ``m=1``
     and ``n=10``, it works though ``n`` is small. The method is intended
-    for extremely large ``n / m`` [4]_.
+    for extremely large ``n / m``.
 
     The convergence speed depends basically on two factors:
 
@@ -227,13 +236,7 @@ def lobpcg(A, X,
            (BLOPEX) in hypre and PETSc. :arxiv:`0705.2626`
 
     .. [3] A. V. Knyazev's C and MATLAB implementations:
-           https://bitbucket.org/joseroman/blopex
-
-    .. [4] S. Yamada, T. Imamura, T. Kano, and M. Machida (2006),
-           High-performance computing for exact numerical approaches to
-           quantum many-body problems on the earth simulator. In Proceedings
-           of the 2006 ACM/IEEE Conference on Supercomputing.
-           :doi:`10.1145/1188455.1188504`
+           https://github.com/lobpcg/blopex
 
     Examples
     --------
@@ -280,7 +283,7 @@ def lobpcg(A, X,
     The preconditioner function is passed to lobpcg as a `LinearOperator`:
 
     >>> M = LinearOperator(matvec=precond, matmat=precond,
-    ...                    shape=(n, n), dtype=float)
+    ...                    shape=(n, n), dtype=np.float64)
 
     Let us now solve the eigenvalue problem for the matrix A:
 
@@ -305,7 +308,7 @@ def lobpcg(A, X,
 
     # Block size.
     if len(blockVectorX.shape) != 2:
-        raise ValueError('expected rank-2 array for argument X')
+        raise ValueError("expected rank-2 array for argument X")
 
     n, sizeX = blockVectorX.shape
 
@@ -335,25 +338,32 @@ def lobpcg(A, X,
     M = _makeOperator(M, (n, n))
 
     if (n - sizeY) < (5 * sizeX):
-        # warn('The problem size is small compared to the block size.' \
-        #        ' Using dense eigensolver instead of LOBPCG.')
+        warnings.warn(
+            f"The problem size {n} minus the constraints size {sizeY} "
+            f"is too small relative to the block size {sizeX}. "
+            f"Using a dense eigensolver instead of LOBPCG.",
+            UserWarning, stacklevel=2
+        )
 
         sizeX = min(sizeX, n)
 
         if blockVectorY is not None:
-            raise NotImplementedError('The dense eigensolver '
-                                      'does not support constraints.')
+            raise NotImplementedError(
+                "The dense eigensolver does not support constraints."
+            )
 
         # Define the closed range of indices of eigenvalues to return.
         if largest:
-            eigvals = (n - sizeX, n-1)
+            eigvals = (n - sizeX, n - 1)
         else:
-            eigvals = (0, sizeX-1)
+            eigvals = (0, sizeX - 1)
 
         A_dense = A(np.eye(n, dtype=A.dtype))
         B_dense = None if B is None else B(np.eye(n, dtype=B.dtype))
 
-        vals, vecs = eigh(A_dense, B_dense, eigvals=eigvals,
+        vals, vecs = eigh(A_dense,
+                          B_dense,
+                          eigvals=eigvals,
                           check_finite=False)
         if largest:
             # Reverse order to be compatible with eigs() in 'LM' mode.
@@ -379,13 +389,15 @@ def lobpcg(A, X,
             # gramYBY is a Cholesky factor from now on...
             gramYBY = cho_factor(gramYBY)
         except LinAlgError as e:
-            raise ValueError('cannot handle linearly dependent constraints') from e
+            raise ValueError("Linearly dependent constraints") from e
 
         _applyConstraints(blockVectorX, gramYBY, blockVectorBY, blockVectorY)
 
     ##
     # B-orthonormalize X.
     blockVectorX, blockVectorBX = _b_orthonormalize(B, blockVectorX)
+    if blockVectorX is None:
+        raise ValueError("Linearly dependent initial approximations")
 
     ##
     # Compute the initial Ritz vectors: solve the eigenproblem.
@@ -426,7 +438,8 @@ def lobpcg(A, X,
     while iterationNumber < maxiter:
         iterationNumber += 1
         if verbosityLevel > 0:
-            print('iteration %d' % iterationNumber)
+            print("-"*50)
+            print(f"iteration {iterationNumber}")
 
         if B is not None:
             aux = blockVectorBX * _lambda[np.newaxis, :]
@@ -454,9 +467,9 @@ def lobpcg(A, X,
             break
 
         if verbosityLevel > 0:
-            print('current block size:', currentBlockSize)
-            print('eigenvalue:', _lambda)
-            print('residual norms:', residualNorms)
+            print(f"current block size: {currentBlockSize}")
+            print(f"eigenvalue(s):\n{_lambda}")
+            print(f"residual norm(s):\n{residualNorms}")
         if verbosityLevel > 10:
             print(eigBlockVector)
 
@@ -476,30 +489,43 @@ def lobpcg(A, X,
         # Apply constraints to the preconditioned residuals.
         if blockVectorY is not None:
             _applyConstraints(activeBlockVectorR,
-                              gramYBY, blockVectorBY, blockVectorY)
+                              gramYBY,
+                              blockVectorBY,
+                              blockVectorY)
 
         ##
         # B-orthogonalize the preconditioned residuals to X.
         if B is not None:
-            activeBlockVectorR = activeBlockVectorR - np.matmul(blockVectorX,
-                                 np.matmul(blockVectorBX.T.conj(),
-                                 activeBlockVectorR))
+            activeBlockVectorR = activeBlockVectorR - (
+                blockVectorX @
+                (blockVectorBX.T.conj() @ activeBlockVectorR)
+            )
         else:
-            activeBlockVectorR = activeBlockVectorR - np.matmul(blockVectorX,
-                                 np.matmul(blockVectorX.T.conj(),
-                                 activeBlockVectorR))
+            activeBlockVectorR = activeBlockVectorR - (
+                blockVectorX @
+                (blockVectorX.T.conj() @ activeBlockVectorR)
+            )
 
         ##
         # B-orthonormalize the preconditioned residuals.
         aux = _b_orthonormalize(B, activeBlockVectorR)
         activeBlockVectorR, activeBlockVectorBR = aux
 
+        if activeBlockVectorR is None:
+            warnings.warn(
+                f"Failed at iteration {iterationNumber} with accuracies "
+                f"{residualNorms}\n not reaching the requested "
+                f"tolerance {residualTolerance}.",
+                UserWarning, stacklevel=2
+            )
+            break
         activeBlockVectorAR = A(activeBlockVectorR)
 
         if iterationNumber > 0:
             if B is not None:
-                aux = _b_orthonormalize(B, activeBlockVectorP,
-                                        activeBlockVectorBP, retInvR=True)
+                aux = _b_orthonormalize(
+                    B, activeBlockVectorP, activeBlockVectorBP, retInvR=True
+                )
                 activeBlockVectorP, activeBlockVectorBP, invR, normal = aux
             else:
                 aux = _b_orthonormalize(B, activeBlockVectorP, retInvR=True)
@@ -516,9 +542,9 @@ def lobpcg(A, X,
         # Perform the Rayleigh Ritz Procedure:
         # Compute symmetric Gram matrices:
 
-        if activeBlockVectorAR.dtype == 'float32':
+        if activeBlockVectorAR.dtype == "float32":
             myeps = 1
-        elif activeBlockVectorR.dtype == 'float32':
+        elif activeBlockVectorR.dtype == "float32":
             myeps = 1e-4
         else:
             myeps = 1e-8
@@ -541,9 +567,9 @@ def lobpcg(A, X,
         gramRAR = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorAR)
 
         if explicitGramFlag:
-            gramRAR = (gramRAR + gramRAR.T.conj())/2
+            gramRAR = (gramRAR + gramRAR.T.conj()) / 2
             gramXAX = np.dot(blockVectorX.T.conj(), blockVectorAX)
-            gramXAX = (gramXAX + gramXAX.T.conj())/2
+            gramXAX = (gramXAX + gramXAX.T.conj()) / 2
             gramXBX = np.dot(blockVectorX.T.conj(), blockVectorBX)
             gramRBR = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorBR)
             gramXBR = np.dot(blockVectorX.T.conj(), activeBlockVectorBR)
@@ -555,12 +581,12 @@ def lobpcg(A, X,
 
         def _handle_gramA_gramB_verbosity(gramA, gramB):
             if verbosityLevel > 0:
-                _report_nonhermitian(gramA, 'gramA')
-                _report_nonhermitian(gramB, 'gramB')
+                _report_nonhermitian(gramA, "gramA")
+                _report_nonhermitian(gramB, "gramB")
             if verbosityLevel > 10:
                 # Note: not documented, but leave it in here for now
-                np.savetxt('gramA.txt', gramA)
-                np.savetxt('gramB.txt', gramB)
+                np.savetxt("gramA.txt", gramA)
+                np.savetxt("gramB.txt", gramB)
 
         if not restart:
             gramXAP = np.dot(blockVectorX.T.conj(), activeBlockVectorAP)
@@ -569,46 +595,54 @@ def _handle_gramA_gramB_verbosity(gramA, gramB):
             gramXBP = np.dot(blockVectorX.T.conj(), activeBlockVectorBP)
             gramRBP = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorBP)
             if explicitGramFlag:
-                gramPAP = (gramPAP + gramPAP.T.conj())/2
+                gramPAP = (gramPAP + gramPAP.T.conj()) / 2
                 gramPBP = np.dot(activeBlockVectorP.T.conj(),
                                  activeBlockVectorBP)
             else:
                 gramPBP = ident
 
-            gramA = bmat([[gramXAX, gramXAR, gramXAP],
-                          [gramXAR.T.conj(), gramRAR, gramRAP],
-                          [gramXAP.T.conj(), gramRAP.T.conj(), gramPAP]])
-            gramB = bmat([[gramXBX, gramXBR, gramXBP],
-                          [gramXBR.T.conj(), gramRBR, gramRBP],
-                          [gramXBP.T.conj(), gramRBP.T.conj(), gramPBP]])
+            gramA = bmat(
+                [
+                    [gramXAX, gramXAR, gramXAP],
+                    [gramXAR.T.conj(), gramRAR, gramRAP],
+                    [gramXAP.T.conj(), gramRAP.T.conj(), gramPAP],
+                ]
+            )
+            gramB = bmat(
+                [
+                    [gramXBX, gramXBR, gramXBP],
+                    [gramXBR.T.conj(), gramRBR, gramRBP],
+                    [gramXBP.T.conj(), gramRBP.T.conj(), gramPBP],
+                ]
+            )
 
             _handle_gramA_gramB_verbosity(gramA, gramB)
 
             try:
-                _lambda, eigBlockVector = eigh(gramA, gramB,
+                _lambda, eigBlockVector = eigh(gramA,
+                                               gramB,
                                                check_finite=False)
             except LinAlgError:
                 # try again after dropping the direction vectors P from RR
                 restart = True
 
         if restart:
-            gramA = bmat([[gramXAX, gramXAR],
-                          [gramXAR.T.conj(), gramRAR]])
-            gramB = bmat([[gramXBX, gramXBR],
-                          [gramXBR.T.conj(), gramRBR]])
+            gramA = bmat([[gramXAX, gramXAR], [gramXAR.T.conj(), gramRAR]])
+            gramB = bmat([[gramXBX, gramXBR], [gramXBR.T.conj(), gramRBR]])
 
             _handle_gramA_gramB_verbosity(gramA, gramB)
 
             try:
-                _lambda, eigBlockVector = eigh(gramA, gramB,
+                _lambda, eigBlockVector = eigh(gramA,
+                                               gramB,
                                                check_finite=False)
             except LinAlgError as e:
-                raise ValueError('eigh has failed in lobpcg iterations') from e
+                raise ValueError("eigh has failed in lobpcg iterations") from e
 
         ii = _get_indx(_lambda, sizeX, largest)
         if verbosityLevel > 10:
             print(ii)
-            print(_lambda)
+            print(f"lambda:\n{_lambda}")
 
         _lambda = _lambda[ii]
         eigBlockVector = eigBlockVector[:, ii]
@@ -616,12 +650,12 @@ def _handle_gramA_gramB_verbosity(gramA, gramB):
         lambdaHistory.append(_lambda)
 
         if verbosityLevel > 10:
-            print('lambda:', _lambda)
-#         # Normalize eigenvectors!
-#         aux = np.sum( eigBlockVector.conj() * eigBlockVector, 0 )
-#         eigVecNorms = np.sqrt( aux )
-#         eigBlockVector = eigBlockVector / eigVecNorms[np.newaxis, :]
-#         eigBlockVector, aux = _b_orthonormalize( B, eigBlockVector )
+            print(f"lambda:\n{_lambda}")
+        #         # Normalize eigenvectors!
+        #         aux = np.sum( eigBlockVector.conj() * eigBlockVector, 0 )
+        #         eigVecNorms = np.sqrt( aux )
+        #         eigBlockVector = eigBlockVector / eigVecNorms[np.newaxis, :]
+        #         eigBlockVector, aux = _b_orthonormalize( B, eigBlockVector )
 
         if verbosityLevel > 10:
             print(eigBlockVector)
@@ -630,8 +664,9 @@ def _handle_gramA_gramB_verbosity(gramA, gramB):
         if B is not None:
             if not restart:
                 eigBlockVectorX = eigBlockVector[:sizeX]
-                eigBlockVectorR = eigBlockVector[sizeX:sizeX+currentBlockSize]
-                eigBlockVectorP = eigBlockVector[sizeX+currentBlockSize:]
+                eigBlockVectorR = eigBlockVector[sizeX:
+                                                 sizeX + currentBlockSize]
+                eigBlockVectorP = eigBlockVector[sizeX + currentBlockSize:]
 
                 pp = np.dot(activeBlockVectorR, eigBlockVectorR)
                 pp += np.dot(activeBlockVectorP, eigBlockVectorP)
@@ -663,8 +698,9 @@ def _handle_gramA_gramB_verbosity(gramA, gramB):
         else:
             if not restart:
                 eigBlockVectorX = eigBlockVector[:sizeX]
-                eigBlockVectorR = eigBlockVector[sizeX:sizeX+currentBlockSize]
-                eigBlockVectorP = eigBlockVector[sizeX+currentBlockSize:]
+                eigBlockVectorR = eigBlockVector[sizeX:
+                                                 sizeX + currentBlockSize]
+                eigBlockVectorP = eigBlockVector[sizeX + currentBlockSize:]
 
                 pp = np.dot(activeBlockVectorR, eigBlockVectorR)
                 pp += np.dot(activeBlockVectorP, eigBlockVectorP)
@@ -698,14 +734,22 @@ def _handle_gramA_gramB_verbosity(gramA, gramB):
     aux = np.sum(blockVectorR.conj() * blockVectorR, 0)
     residualNorms = np.sqrt(aux)
 
+    if np.max(residualNorms) > residualTolerance:
+        warnings.warn(
+            f"Exited at iteration {iterationNumber} with accuracies \n"
+            f"{residualNorms}\n"
+            f"not reaching the requested tolerance {residualTolerance}.",
+            UserWarning, stacklevel=2
+        )
+
     # Future work: Need to add Postprocessing here:
     # Making sure eigenvectors "exactly" satisfy the blockVectorY constrains?
     # Making sure eigenvecotrs are "exactly" othonormalized by final "exact" RR
-    # Computing the actual true residuals
+    # Keeping the best iterates in case of divergence
 
     if verbosityLevel > 0:
-        print('final eigenvalue:', _lambda)
-        print('final residual norms:', residualNorms)
+        print(f"Final eigenvalue(s):\n{_lambda}")
+        print(f"Final residual norm(s):\n{residualNorms}")
 
     if retLambdaHistory:
         if retResidualNormsHistory: