diff --git a/sklearn/externals/_lobpcg.py b/sklearn/externals/_lobpcg.py index 1fd9e16ba0748..1de3900b3f89c 100644 --- a/sklearn/externals/_lobpcg.py +++ b/sklearn/externals/_lobpcg.py @@ -1,5 +1,5 @@ """ -scikit-learn copy of scipy/sparse/linalg/eigen/lobpcg/lobpcg.py v1.7.1 +scikit-learn copy of scipy/sparse/linalg/eigen/lobpcg/lobpcg.py v1.8.0 to be deleted after scipy 1.3.0 becomes a dependency in scikit-lean ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Locally Optimal Block Preconditioned Conjugate Gradient Method (LOBPCG). @@ -20,13 +20,14 @@ https://github.com/lobpcg/blopex """ +import warnings import numpy as np -from scipy.linalg import (inv, eigh, cho_factor, cho_solve, cholesky, - LinAlgError) +from scipy.linalg import (inv, eigh, cho_factor, cho_solve, + cholesky, LinAlgError) from scipy.sparse.linalg import aslinearoperator from numpy import block as bmat -__all__ = ['lobpcg'] +__all__ = ["lobpcg"] def _report_nonhermitian(M, name): @@ -36,15 +37,15 @@ def _report_nonhermitian(M, name): from scipy.linalg import norm md = M - M.T.conj() - nmd = norm(md, 1) tol = 10 * np.finfo(M.dtype).eps tol = max(tol, tol * norm(M, 1)) if nmd > tol: - print('matrix %s of the type %s is not sufficiently Hermitian:' - % (name, M.dtype)) - print('condition: %.e < %e' % (nmd, tol)) - + warnings.warn( + f"Matrix {name} of the type {M.dtype} is not Hermitian: " + f"condition: {nmd} < {tol} fails.", + UserWarning, stacklevel=4 + ) def _as2d(ar): """ @@ -69,7 +70,7 @@ def _makeOperator(operatorInput, expectedShape): operator = aslinearoperator(operatorInput) if operator.shape != expectedShape: - raise ValueError('operator has invalid shape') + raise ValueError("operator has invalid shape") return operator @@ -83,7 +84,7 @@ def _applyConstraints(blockVectorV, factYBY, blockVectorBY, blockVectorY): def _b_orthonormalize(B, blockVectorV, blockVectorBV=None, retInvR=False): """B-orthonormalize the given block vector using Cholesky.""" - normalization = blockVectorV.max(axis=0)+np.finfo(blockVectorV.dtype).eps + normalization = blockVectorV.max(axis=0) + np.finfo(blockVectorV.dtype).eps blockVectorV = blockVectorV / normalization if blockVectorBV is None: if B is not None: @@ -92,20 +93,20 @@ def _b_orthonormalize(B, blockVectorV, blockVectorBV=None, retInvR=False): blockVectorBV = blockVectorV # Shared data!!! else: blockVectorBV = blockVectorBV / normalization - VBV = np.matmul(blockVectorV.T.conj(), blockVectorBV) + VBV = blockVectorV.T.conj() @ blockVectorBV try: # VBV is a Cholesky factor from now on... VBV = cholesky(VBV, overwrite_a=True) VBV = inv(VBV, overwrite_a=True) - blockVectorV = np.matmul(blockVectorV, VBV) + blockVectorV = blockVectorV @ VBV # blockVectorV = (cho_solve((VBV.T, True), blockVectorV.T)).T if B is not None: - blockVectorBV = np.matmul(blockVectorBV, VBV) + blockVectorBV = blockVectorBV @ VBV # blockVectorBV = (cho_solve((VBV.T, True), blockVectorBV.T)).T else: blockVectorBV = None except LinAlgError: - #raise ValueError('Cholesky has failed') + # raise ValueError('Cholesky has failed') blockVectorV = None blockVectorBV = None VBV = None @@ -120,18 +121,26 @@ def _get_indx(_lambda, num, largest): """Get `num` indices into `_lambda` depending on `largest` option.""" ii = np.argsort(_lambda) if largest: - ii = ii[:-num-1:-1] + ii = ii[:-num - 1:-1] else: ii = ii[:num] return ii -def lobpcg(A, X, - B=None, M=None, Y=None, - tol=None, maxiter=None, - largest=True, verbosityLevel=0, - retLambdaHistory=False, retResidualNormsHistory=False): +def lobpcg( + A, + X, + B=None, + M=None, + Y=None, + tol=None, + maxiter=None, + largest=True, + verbosityLevel=0, + retLambdaHistory=False, + retResidualNormsHistory=False, +): """Locally Optimal Block Preconditioned Conjugate Gradient Method (LOBPCG) LOBPCG is a preconditioned eigensolver for large symmetric positive @@ -200,7 +209,7 @@ def lobpcg(A, X, It is not that ``n`` should be large for the LOBPCG to work, but rather the ratio ``n / m`` should be large. It you call LOBPCG with ``m=1`` and ``n=10``, it works though ``n`` is small. The method is intended - for extremely large ``n / m`` [4]_. + for extremely large ``n / m``. The convergence speed depends basically on two factors: @@ -227,13 +236,7 @@ def lobpcg(A, X, (BLOPEX) in hypre and PETSc. :arxiv:`0705.2626` .. [3] A. V. Knyazev's C and MATLAB implementations: - https://bitbucket.org/joseroman/blopex - - .. [4] S. Yamada, T. Imamura, T. Kano, and M. Machida (2006), - High-performance computing for exact numerical approaches to - quantum many-body problems on the earth simulator. In Proceedings - of the 2006 ACM/IEEE Conference on Supercomputing. - :doi:`10.1145/1188455.1188504` + https://github.com/lobpcg/blopex Examples -------- @@ -280,7 +283,7 @@ def lobpcg(A, X, The preconditioner function is passed to lobpcg as a `LinearOperator`: >>> M = LinearOperator(matvec=precond, matmat=precond, - ... shape=(n, n), dtype=float) + ... shape=(n, n), dtype=np.float64) Let us now solve the eigenvalue problem for the matrix A: @@ -305,7 +308,7 @@ def lobpcg(A, X, # Block size. if len(blockVectorX.shape) != 2: - raise ValueError('expected rank-2 array for argument X') + raise ValueError("expected rank-2 array for argument X") n, sizeX = blockVectorX.shape @@ -335,25 +338,32 @@ def lobpcg(A, X, M = _makeOperator(M, (n, n)) if (n - sizeY) < (5 * sizeX): - # warn('The problem size is small compared to the block size.' \ - # ' Using dense eigensolver instead of LOBPCG.') + warnings.warn( + f"The problem size {n} minus the constraints size {sizeY} " + f"is too small relative to the block size {sizeX}. " + f"Using a dense eigensolver instead of LOBPCG.", + UserWarning, stacklevel=2 + ) sizeX = min(sizeX, n) if blockVectorY is not None: - raise NotImplementedError('The dense eigensolver ' - 'does not support constraints.') + raise NotImplementedError( + "The dense eigensolver does not support constraints." + ) # Define the closed range of indices of eigenvalues to return. if largest: - eigvals = (n - sizeX, n-1) + eigvals = (n - sizeX, n - 1) else: - eigvals = (0, sizeX-1) + eigvals = (0, sizeX - 1) A_dense = A(np.eye(n, dtype=A.dtype)) B_dense = None if B is None else B(np.eye(n, dtype=B.dtype)) - vals, vecs = eigh(A_dense, B_dense, eigvals=eigvals, + vals, vecs = eigh(A_dense, + B_dense, + eigvals=eigvals, check_finite=False) if largest: # Reverse order to be compatible with eigs() in 'LM' mode. @@ -379,13 +389,15 @@ def lobpcg(A, X, # gramYBY is a Cholesky factor from now on... gramYBY = cho_factor(gramYBY) except LinAlgError as e: - raise ValueError('cannot handle linearly dependent constraints') from e + raise ValueError("Linearly dependent constraints") from e _applyConstraints(blockVectorX, gramYBY, blockVectorBY, blockVectorY) ## # B-orthonormalize X. blockVectorX, blockVectorBX = _b_orthonormalize(B, blockVectorX) + if blockVectorX is None: + raise ValueError("Linearly dependent initial approximations") ## # Compute the initial Ritz vectors: solve the eigenproblem. @@ -426,7 +438,8 @@ def lobpcg(A, X, while iterationNumber < maxiter: iterationNumber += 1 if verbosityLevel > 0: - print('iteration %d' % iterationNumber) + print("-"*50) + print(f"iteration {iterationNumber}") if B is not None: aux = blockVectorBX * _lambda[np.newaxis, :] @@ -454,9 +467,9 @@ def lobpcg(A, X, break if verbosityLevel > 0: - print('current block size:', currentBlockSize) - print('eigenvalue:', _lambda) - print('residual norms:', residualNorms) + print(f"current block size: {currentBlockSize}") + print(f"eigenvalue(s):\n{_lambda}") + print(f"residual norm(s):\n{residualNorms}") if verbosityLevel > 10: print(eigBlockVector) @@ -476,30 +489,43 @@ def lobpcg(A, X, # Apply constraints to the preconditioned residuals. if blockVectorY is not None: _applyConstraints(activeBlockVectorR, - gramYBY, blockVectorBY, blockVectorY) + gramYBY, + blockVectorBY, + blockVectorY) ## # B-orthogonalize the preconditioned residuals to X. if B is not None: - activeBlockVectorR = activeBlockVectorR - np.matmul(blockVectorX, - np.matmul(blockVectorBX.T.conj(), - activeBlockVectorR)) + activeBlockVectorR = activeBlockVectorR - ( + blockVectorX @ + (blockVectorBX.T.conj() @ activeBlockVectorR) + ) else: - activeBlockVectorR = activeBlockVectorR - np.matmul(blockVectorX, - np.matmul(blockVectorX.T.conj(), - activeBlockVectorR)) + activeBlockVectorR = activeBlockVectorR - ( + blockVectorX @ + (blockVectorX.T.conj() @ activeBlockVectorR) + ) ## # B-orthonormalize the preconditioned residuals. aux = _b_orthonormalize(B, activeBlockVectorR) activeBlockVectorR, activeBlockVectorBR = aux + if activeBlockVectorR is None: + warnings.warn( + f"Failed at iteration {iterationNumber} with accuracies " + f"{residualNorms}\n not reaching the requested " + f"tolerance {residualTolerance}.", + UserWarning, stacklevel=2 + ) + break activeBlockVectorAR = A(activeBlockVectorR) if iterationNumber > 0: if B is not None: - aux = _b_orthonormalize(B, activeBlockVectorP, - activeBlockVectorBP, retInvR=True) + aux = _b_orthonormalize( + B, activeBlockVectorP, activeBlockVectorBP, retInvR=True + ) activeBlockVectorP, activeBlockVectorBP, invR, normal = aux else: aux = _b_orthonormalize(B, activeBlockVectorP, retInvR=True) @@ -516,9 +542,9 @@ def lobpcg(A, X, # Perform the Rayleigh Ritz Procedure: # Compute symmetric Gram matrices: - if activeBlockVectorAR.dtype == 'float32': + if activeBlockVectorAR.dtype == "float32": myeps = 1 - elif activeBlockVectorR.dtype == 'float32': + elif activeBlockVectorR.dtype == "float32": myeps = 1e-4 else: myeps = 1e-8 @@ -541,9 +567,9 @@ def lobpcg(A, X, gramRAR = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorAR) if explicitGramFlag: - gramRAR = (gramRAR + gramRAR.T.conj())/2 + gramRAR = (gramRAR + gramRAR.T.conj()) / 2 gramXAX = np.dot(blockVectorX.T.conj(), blockVectorAX) - gramXAX = (gramXAX + gramXAX.T.conj())/2 + gramXAX = (gramXAX + gramXAX.T.conj()) / 2 gramXBX = np.dot(blockVectorX.T.conj(), blockVectorBX) gramRBR = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorBR) gramXBR = np.dot(blockVectorX.T.conj(), activeBlockVectorBR) @@ -555,12 +581,12 @@ def lobpcg(A, X, def _handle_gramA_gramB_verbosity(gramA, gramB): if verbosityLevel > 0: - _report_nonhermitian(gramA, 'gramA') - _report_nonhermitian(gramB, 'gramB') + _report_nonhermitian(gramA, "gramA") + _report_nonhermitian(gramB, "gramB") if verbosityLevel > 10: # Note: not documented, but leave it in here for now - np.savetxt('gramA.txt', gramA) - np.savetxt('gramB.txt', gramB) + np.savetxt("gramA.txt", gramA) + np.savetxt("gramB.txt", gramB) if not restart: gramXAP = np.dot(blockVectorX.T.conj(), activeBlockVectorAP) @@ -569,46 +595,54 @@ def _handle_gramA_gramB_verbosity(gramA, gramB): gramXBP = np.dot(blockVectorX.T.conj(), activeBlockVectorBP) gramRBP = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorBP) if explicitGramFlag: - gramPAP = (gramPAP + gramPAP.T.conj())/2 + gramPAP = (gramPAP + gramPAP.T.conj()) / 2 gramPBP = np.dot(activeBlockVectorP.T.conj(), activeBlockVectorBP) else: gramPBP = ident - gramA = bmat([[gramXAX, gramXAR, gramXAP], - [gramXAR.T.conj(), gramRAR, gramRAP], - [gramXAP.T.conj(), gramRAP.T.conj(), gramPAP]]) - gramB = bmat([[gramXBX, gramXBR, gramXBP], - [gramXBR.T.conj(), gramRBR, gramRBP], - [gramXBP.T.conj(), gramRBP.T.conj(), gramPBP]]) + gramA = bmat( + [ + [gramXAX, gramXAR, gramXAP], + [gramXAR.T.conj(), gramRAR, gramRAP], + [gramXAP.T.conj(), gramRAP.T.conj(), gramPAP], + ] + ) + gramB = bmat( + [ + [gramXBX, gramXBR, gramXBP], + [gramXBR.T.conj(), gramRBR, gramRBP], + [gramXBP.T.conj(), gramRBP.T.conj(), gramPBP], + ] + ) _handle_gramA_gramB_verbosity(gramA, gramB) try: - _lambda, eigBlockVector = eigh(gramA, gramB, + _lambda, eigBlockVector = eigh(gramA, + gramB, check_finite=False) except LinAlgError: # try again after dropping the direction vectors P from RR restart = True if restart: - gramA = bmat([[gramXAX, gramXAR], - [gramXAR.T.conj(), gramRAR]]) - gramB = bmat([[gramXBX, gramXBR], - [gramXBR.T.conj(), gramRBR]]) + gramA = bmat([[gramXAX, gramXAR], [gramXAR.T.conj(), gramRAR]]) + gramB = bmat([[gramXBX, gramXBR], [gramXBR.T.conj(), gramRBR]]) _handle_gramA_gramB_verbosity(gramA, gramB) try: - _lambda, eigBlockVector = eigh(gramA, gramB, + _lambda, eigBlockVector = eigh(gramA, + gramB, check_finite=False) except LinAlgError as e: - raise ValueError('eigh has failed in lobpcg iterations') from e + raise ValueError("eigh has failed in lobpcg iterations") from e ii = _get_indx(_lambda, sizeX, largest) if verbosityLevel > 10: print(ii) - print(_lambda) + print(f"lambda:\n{_lambda}") _lambda = _lambda[ii] eigBlockVector = eigBlockVector[:, ii] @@ -616,12 +650,12 @@ def _handle_gramA_gramB_verbosity(gramA, gramB): lambdaHistory.append(_lambda) if verbosityLevel > 10: - print('lambda:', _lambda) -# # Normalize eigenvectors! -# aux = np.sum( eigBlockVector.conj() * eigBlockVector, 0 ) -# eigVecNorms = np.sqrt( aux ) -# eigBlockVector = eigBlockVector / eigVecNorms[np.newaxis, :] -# eigBlockVector, aux = _b_orthonormalize( B, eigBlockVector ) + print(f"lambda:\n{_lambda}") + # # Normalize eigenvectors! + # aux = np.sum( eigBlockVector.conj() * eigBlockVector, 0 ) + # eigVecNorms = np.sqrt( aux ) + # eigBlockVector = eigBlockVector / eigVecNorms[np.newaxis, :] + # eigBlockVector, aux = _b_orthonormalize( B, eigBlockVector ) if verbosityLevel > 10: print(eigBlockVector) @@ -630,8 +664,9 @@ def _handle_gramA_gramB_verbosity(gramA, gramB): if B is not None: if not restart: eigBlockVectorX = eigBlockVector[:sizeX] - eigBlockVectorR = eigBlockVector[sizeX:sizeX+currentBlockSize] - eigBlockVectorP = eigBlockVector[sizeX+currentBlockSize:] + eigBlockVectorR = eigBlockVector[sizeX: + sizeX + currentBlockSize] + eigBlockVectorP = eigBlockVector[sizeX + currentBlockSize:] pp = np.dot(activeBlockVectorR, eigBlockVectorR) pp += np.dot(activeBlockVectorP, eigBlockVectorP) @@ -663,8 +698,9 @@ def _handle_gramA_gramB_verbosity(gramA, gramB): else: if not restart: eigBlockVectorX = eigBlockVector[:sizeX] - eigBlockVectorR = eigBlockVector[sizeX:sizeX+currentBlockSize] - eigBlockVectorP = eigBlockVector[sizeX+currentBlockSize:] + eigBlockVectorR = eigBlockVector[sizeX: + sizeX + currentBlockSize] + eigBlockVectorP = eigBlockVector[sizeX + currentBlockSize:] pp = np.dot(activeBlockVectorR, eigBlockVectorR) pp += np.dot(activeBlockVectorP, eigBlockVectorP) @@ -698,14 +734,22 @@ def _handle_gramA_gramB_verbosity(gramA, gramB): aux = np.sum(blockVectorR.conj() * blockVectorR, 0) residualNorms = np.sqrt(aux) + if np.max(residualNorms) > residualTolerance: + warnings.warn( + f"Exited at iteration {iterationNumber} with accuracies \n" + f"{residualNorms}\n" + f"not reaching the requested tolerance {residualTolerance}.", + UserWarning, stacklevel=2 + ) + # Future work: Need to add Postprocessing here: # Making sure eigenvectors "exactly" satisfy the blockVectorY constrains? # Making sure eigenvecotrs are "exactly" othonormalized by final "exact" RR - # Computing the actual true residuals + # Keeping the best iterates in case of divergence if verbosityLevel > 0: - print('final eigenvalue:', _lambda) - print('final residual norms:', residualNorms) + print(f"Final eigenvalue(s):\n{_lambda}") + print(f"Final residual norm(s):\n{residualNorms}") if retLambdaHistory: if retResidualNormsHistory: