Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 92bc3cf

Browse files
jjerphanpunndcoder28
authored andcommitted
MAINT Remove soon-to-be deprecated scipy.sparse functions (scikit-learn#26751)
1 parent ea75350 commit 92bc3cf

File tree

25 files changed

+87
-88
lines changed

25 files changed

+87
-88
lines changed

sklearn/cluster/_agglomerative.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,12 @@ def _fix_connectivity(X, connectivity, affinity):
9090
connectivity = connectivity + connectivity.T
9191

9292
# Convert connectivity matrix to LIL
93-
if not sparse.isspmatrix_lil(connectivity):
94-
if not sparse.isspmatrix(connectivity):
95-
connectivity = sparse.lil_matrix(connectivity)
96-
else:
97-
connectivity = connectivity.tolil()
93+
if not sparse.issparse(connectivity):
94+
connectivity = sparse.lil_matrix(connectivity)
95+
96+
# `connectivity` is a sparse matrix at this point
97+
if connectivity.format != "lil":
98+
connectivity = connectivity.tolil()
9899

99100
# Compute the number of nodes
100101
n_connected_components, labels = connected_components(connectivity)

sklearn/datasets/tests/test_20news.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,15 +64,15 @@ def test_20news_length_consistency(fetch_20newsgroups_fxt):
6464
def test_20news_vectorized(fetch_20newsgroups_vectorized_fxt):
6565
# test subset = train
6666
bunch = fetch_20newsgroups_vectorized_fxt(subset="train")
67-
assert sp.isspmatrix_csr(bunch.data)
67+
assert sp.issparse(bunch.data) and bunch.data.format == "csr"
6868
assert bunch.data.shape == (11314, 130107)
6969
assert bunch.target.shape[0] == 11314
7070
assert bunch.data.dtype == np.float64
7171
assert bunch.DESCR.startswith(".. _20newsgroups_dataset:")
7272

7373
# test subset = test
7474
bunch = fetch_20newsgroups_vectorized_fxt(subset="test")
75-
assert sp.isspmatrix_csr(bunch.data)
75+
assert sp.issparse(bunch.data) and bunch.data.format == "csr"
7676
assert bunch.data.shape == (7532, 130107)
7777
assert bunch.target.shape[0] == 7532
7878
assert bunch.data.dtype == np.float64
@@ -84,7 +84,7 @@ def test_20news_vectorized(fetch_20newsgroups_vectorized_fxt):
8484

8585
# test subset = all
8686
bunch = fetch_20newsgroups_vectorized_fxt(subset="all")
87-
assert sp.isspmatrix_csr(bunch.data)
87+
assert sp.issparse(bunch.data) and bunch.data.format == "csr"
8888
assert bunch.data.shape == (11314 + 7532, 130107)
8989
assert bunch.target.shape[0] == 11314 + 7532
9090
assert bunch.data.dtype == np.float64

sklearn/feature_extraction/text.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -917,7 +917,7 @@ def _more_tags(self):
917917

918918
def _document_frequency(X):
919919
"""Count the number of non-zero values for each feature in sparse X."""
920-
if sp.isspmatrix_csr(X):
920+
if sp.issparse(X) and X.format == "csr":
921921
return np.bincount(X.indices, minlength=X.shape[1])
922922
else:
923923
return np.diff(X.indptr)

sklearn/kernel_approximation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ def fit(self, X, y=None):
363363
X = self._validate_data(X, accept_sparse="csr")
364364
random_state = check_random_state(self.random_state)
365365
n_features = X.shape[1]
366-
sparse = sp.isspmatrix(X)
366+
sparse = sp.issparse(X)
367367
if self.gamma == "scale":
368368
# var = E[X^2] - E[X]^2 if sparse
369369
X_var = (X.multiply(X)).mean() - (X.mean()) ** 2 if sparse else X.var()

sklearn/linear_model/_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -839,7 +839,7 @@ def _pre_fit(
839839
"""
840840
n_samples, n_features = X.shape
841841

842-
if sparse.isspmatrix(X):
842+
if sparse.issparse(X):
843843
# copy is not needed here as X is not modified inplace when X is sparse
844844
precompute = False
845845
X, y, X_offset, y_offset, X_scale = _preprocess_data(

sklearn/linear_model/_coordinate_descent.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def _alpha_grid(
137137

138138
sparse_center = False
139139
if Xy is None:
140-
X_sparse = sparse.isspmatrix(X)
140+
X_sparse = sparse.issparse(X)
141141
sparse_center = X_sparse and fit_intercept
142142
X = check_array(
143143
X, accept_sparse="csc", copy=(copy_X and fit_intercept and not X_sparse)
@@ -560,7 +560,7 @@ def enet_path(
560560
raise ValueError("positive=True is not allowed for multi-output (y.ndim != 1)")
561561

562562
# MultiTaskElasticNet does not support sparse matrices
563-
if not multi_output and sparse.isspmatrix(X):
563+
if not multi_output and sparse.issparse(X):
564564
if X_offset_param is not None:
565565
# As sparse matrices are not actually centered we need this to be passed to
566566
# the CD solver.
@@ -621,7 +621,7 @@ def enet_path(
621621
# account for n_samples scaling in objectives between here and cd_fast
622622
l1_reg = alpha * l1_ratio * n_samples
623623
l2_reg = alpha * (1.0 - l1_ratio) * n_samples
624-
if not multi_output and sparse.isspmatrix(X):
624+
if not multi_output and sparse.issparse(X):
625625
model = cd_fast.sparse_enet_coordinate_descent(
626626
w=coef_,
627627
alpha=l1_reg,
@@ -1101,7 +1101,7 @@ def _decision_function(self, X):
11011101
The predicted decision function.
11021102
"""
11031103
check_is_fitted(self)
1104-
if sparse.isspmatrix(X):
1104+
if sparse.issparse(X):
11051105
return safe_sparse_dot(X, self.coef_.T, dense_output=True) + self.intercept_
11061106
else:
11071107
return super()._decision_function(X)
@@ -1546,7 +1546,7 @@ def fit(self, X, y, sample_weight=None):
15461546
check_y_params = dict(
15471547
copy=False, dtype=[np.float64, np.float32], ensure_2d=False
15481548
)
1549-
if isinstance(X, np.ndarray) or sparse.isspmatrix(X):
1549+
if isinstance(X, np.ndarray) or sparse.issparse(X):
15501550
# Keep a reference to X
15511551
reference_to_old_X = X
15521552
# Let us not impose fortran ordering so far: it is
@@ -1563,7 +1563,7 @@ def fit(self, X, y, sample_weight=None):
15631563
X, y = self._validate_data(
15641564
X, y, validate_separately=(check_X_params, check_y_params)
15651565
)
1566-
if sparse.isspmatrix(X):
1566+
if sparse.issparse(X):
15671567
if hasattr(reference_to_old_X, "data") and not np.may_share_memory(
15681568
reference_to_old_X.data, X.data
15691569
):
@@ -1598,7 +1598,7 @@ def fit(self, X, y, sample_weight=None):
15981598
)
15991599
y = column_or_1d(y, warn=True)
16001600
else:
1601-
if sparse.isspmatrix(X):
1601+
if sparse.issparse(X):
16021602
raise TypeError("X should be dense but a sparse matrix waspassed")
16031603
elif y.ndim == 1:
16041604
raise ValueError(

sklearn/linear_model/tests/test_coordinate_descent.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -84,12 +84,10 @@ def test_set_order_sparse(order, input_order):
8484
X = X.asformat(sparse_format)
8585
y = X.asformat(sparse_format)
8686
X2, y2 = _set_order(X, y, order=order)
87-
if order == "C":
88-
assert sparse.isspmatrix_csr(X2)
89-
assert sparse.isspmatrix_csr(y2)
90-
elif order == "F":
91-
assert sparse.isspmatrix_csc(X2)
92-
assert sparse.isspmatrix_csc(y2)
87+
88+
format = "csc" if order == "F" else "csr"
89+
assert sparse.issparse(X2) and X2.format == format
90+
assert sparse.issparse(y2) and y2.format == format
9391

9492

9593
def test_lasso_zero():

sklearn/linear_model/tests/test_sparse_coordinate_descent.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def test_sparse_coef():
1919
clf = ElasticNet()
2020
clf.coef_ = [1, 2, 3]
2121

22-
assert sp.isspmatrix(clf.sparse_coef_)
22+
assert sp.issparse(clf.sparse_coef_)
2323
assert clf.sparse_coef_.toarray().tolist()[0] == clf.coef_
2424

2525

sklearn/manifold/_spectral_embedding.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def _graph_is_connected(graph):
8585
is_connected : bool
8686
True means the graph is fully connected and False means not.
8787
"""
88-
if sparse.isspmatrix(graph):
88+
if sparse.issparse(graph):
8989
# sparse graph, find all the connected components
9090
n_connected_components, _ = connected_components(graph)
9191
return n_connected_components == 1
@@ -118,7 +118,7 @@ def _set_diag(laplacian, value, norm_laplacian):
118118
"""
119119
n_nodes = laplacian.shape[0]
120120
# We need all entries in the diagonal to values
121-
if not sparse.isspmatrix(laplacian):
121+
if not sparse.issparse(laplacian):
122122
if norm_laplacian:
123123
laplacian.flat[:: n_nodes + 1] = value
124124
else:
@@ -280,7 +280,7 @@ def spectral_embedding(
280280
if (
281281
eigen_solver == "arpack"
282282
or eigen_solver != "lobpcg"
283-
and (not sparse.isspmatrix(laplacian) or n_nodes < 5 * n_components)
283+
and (not sparse.issparse(laplacian) or n_nodes < 5 * n_components)
284284
):
285285
# lobpcg used with eigen_solver='amg' has bugs for low number of nodes
286286
# for details see the source code in scipy:
@@ -371,7 +371,7 @@ def spectral_embedding(
371371
# see note above under arpack why lobpcg has problems with small
372372
# number of nodes
373373
# lobpcg will fallback to eigh, so we short circuit it
374-
if sparse.isspmatrix(laplacian):
374+
if sparse.issparse(laplacian):
375375
laplacian = laplacian.toarray()
376376
_, diffusion_map = eigh(laplacian, check_finite=False)
377377
embedding = diffusion_map.T[:n_components]

sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from typing import List
33

44
import numpy as np
5-
from scipy.sparse import issparse, isspmatrix_csr
5+
from scipy.sparse import issparse
66

77
from ... import get_config
88
from .._dist_metrics import BOOL_METRICS, METRIC_MAPPING64
@@ -100,7 +100,8 @@ def is_numpy_c_ordered(X):
100100

101101
def is_valid_sparse_matrix(X):
102102
return (
103-
isspmatrix_csr(X)
103+
issparse(X)
104+
and X.format == "csr"
104105
and
105106
# TODO: support CSR matrices without non-zeros elements
106107
X.nnz > 0

sklearn/model_selection/tests/test_split.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
coo_matrix,
1111
csc_matrix,
1212
csr_matrix,
13-
isspmatrix_csr,
13+
issparse,
1414
)
1515
from scipy.special import comb
1616

@@ -1355,8 +1355,8 @@ def test_train_test_split_sparse():
13551355
for InputFeatureType in sparse_types:
13561356
X_s = InputFeatureType(X)
13571357
X_train, X_test = train_test_split(X_s)
1358-
assert isspmatrix_csr(X_train)
1359-
assert isspmatrix_csr(X_test)
1358+
assert issparse(X_train) and X_train.format == "csr"
1359+
assert issparse(X_test) and X_test.format == "csr"
13601360

13611361

13621362
def test_train_test_split_mock_pandas():

sklearn/preprocessing/_data.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2394,7 +2394,7 @@ def add_dummy_feature(X, value=1.0):
23942394
n_samples, n_features = X.shape
23952395
shape = (n_samples, n_features + 1)
23962396
if sparse.issparse(X):
2397-
if sparse.isspmatrix_coo(X):
2397+
if X.format == "coo":
23982398
# Shift columns to the right.
23992399
col = X.col + 1
24002400
# Column indices of dummy feature are 0 everywhere.
@@ -2404,7 +2404,7 @@ def add_dummy_feature(X, value=1.0):
24042404
# Prepend the dummy feature n_samples times.
24052405
data = np.concatenate((np.full(n_samples, value), X.data))
24062406
return sparse.coo_matrix((data, (row, col)), shape)
2407-
elif sparse.isspmatrix_csc(X):
2407+
elif X.format == "csc":
24082408
# Shift index pointers since we need to add n_samples elements.
24092409
indptr = X.indptr + n_samples
24102410
# indptr[0] must be 0.

sklearn/preprocessing/_polynomial.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,7 @@ def transform(self, X):
435435

436436
n_samples, n_features = X.shape
437437
max_int32 = np.iinfo(np.int32).max
438-
if sparse.isspmatrix_csr(X):
438+
if sparse.issparse(X) and X.format == "csr":
439439
if self._max_degree > 3:
440440
return self.transform(X.tocsc()).tocsr()
441441
to_stack = []
@@ -480,9 +480,9 @@ def transform(self, X):
480480
" transformer to produce fewer than 2^31 output features"
481481
)
482482
XP = sparse.hstack(to_stack, dtype=X.dtype, format="csr")
483-
elif sparse.isspmatrix_csc(X) and self._max_degree < 4:
483+
elif sparse.issparse(X) and X.format == "csc" and self._max_degree < 4:
484484
return self.transform(X.tocsr()).tocsc()
485-
elif sparse.isspmatrix(X):
485+
elif sparse.issparse(X):
486486
combinations = self._combinations(
487487
n_features=n_features,
488488
min_degree=self._min_degree,
@@ -1119,8 +1119,7 @@ def transform(self, X):
11191119
XBS[mask, i * n_splines + k] = linear_extr
11201120

11211121
if use_sparse:
1122-
if not sparse.isspmatrix_csr(XBS_sparse):
1123-
XBS_sparse = XBS_sparse.tocsr()
1122+
XBS_sparse = XBS_sparse.tocsr()
11241123
output_list.append(XBS_sparse)
11251124

11261125
if use_sparse:

sklearn/preprocessing/tests/test_data.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1846,7 +1846,7 @@ def test_normalizer_l1():
18461846
X_norm = normalizer = Normalizer(norm="l2", copy=False).transform(X)
18471847

18481848
assert X_norm is not X
1849-
assert sparse.isspmatrix_csr(X_norm)
1849+
assert sparse.issparse(X_norm) and X_norm.format == "csr"
18501850

18511851
X_norm = toarray(X_norm)
18521852
for i in range(3):
@@ -1893,7 +1893,7 @@ def test_normalizer_l2():
18931893
X_norm = normalizer = Normalizer(norm="l2", copy=False).transform(X)
18941894

18951895
assert X_norm is not X
1896-
assert sparse.isspmatrix_csr(X_norm)
1896+
assert sparse.issparse(X_norm) and X_norm.format == "csr"
18971897

18981898
X_norm = toarray(X_norm)
18991899
for i in range(3):
@@ -1941,7 +1941,7 @@ def test_normalizer_max():
19411941
X_norm = normalizer = Normalizer(norm="l2", copy=False).transform(X)
19421942

19431943
assert X_norm is not X
1944-
assert sparse.isspmatrix_csr(X_norm)
1944+
assert sparse.issparse(X_norm) and X_norm.format == "csr"
19451945

19461946
X_norm = toarray(X_norm)
19471947
for i in range(3):
@@ -2205,21 +2205,21 @@ def test_add_dummy_feature():
22052205
def test_add_dummy_feature_coo():
22062206
X = sparse.coo_matrix([[1, 0], [0, 1], [0, 1]])
22072207
X = add_dummy_feature(X)
2208-
assert sparse.isspmatrix_coo(X), X
2208+
assert sparse.issparse(X) and X.format == "coo", X
22092209
assert_array_equal(X.toarray(), [[1, 1, 0], [1, 0, 1], [1, 0, 1]])
22102210

22112211

22122212
def test_add_dummy_feature_csc():
22132213
X = sparse.csc_matrix([[1, 0], [0, 1], [0, 1]])
22142214
X = add_dummy_feature(X)
2215-
assert sparse.isspmatrix_csc(X), X
2215+
assert sparse.issparse(X) and X.format == "csc", X
22162216
assert_array_equal(X.toarray(), [[1, 1, 0], [1, 0, 1], [1, 0, 1]])
22172217

22182218

22192219
def test_add_dummy_feature_csr():
22202220
X = sparse.csr_matrix([[1, 0], [0, 1], [0, 1]])
22212221
X = add_dummy_feature(X)
2222-
assert sparse.isspmatrix_csr(X), X
2222+
assert sparse.issparse(X) and X.format == "csr", X
22232223
assert_array_equal(X.toarray(), [[1, 1, 0], [1, 0, 1], [1, 0, 1]])
22242224

22252225

sklearn/preprocessing/tests/test_encoders.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ def check_categorical_onehot(X):
240240

241241
assert_allclose(Xtr1.toarray(), Xtr2)
242242

243-
assert sparse.isspmatrix_csr(Xtr1)
243+
assert sparse.issparse(Xtr1) and Xtr1.format == "csr"
244244
return Xtr1.toarray()
245245

246246

0 commit comments

Comments
 (0)