|
2 | 2 | from scipy import sparse
|
3 | 3 |
|
4 | 4 | from sklearn.utils.testing import assert_equal
|
| 5 | +from sklearn.utils.testing import assert_not_equal |
5 | 6 | from sklearn.utils.testing import assert_array_equal
|
6 | 7 | from sklearn.utils.testing import assert_raises
|
7 | 8 | from sklearn.utils.testing import assert_false
|
| 9 | +from sklearn.utils.testing import assert_true |
8 | 10 |
|
9 | 11 | from sklearn.preprocessing.imputation import Imputer
|
10 | 12 | from sklearn.pipeline import Pipeline
|
@@ -250,26 +252,39 @@ def test_imputation_pickle():
|
250 | 252 |
|
251 | 253 |
|
252 | 254 | def test_imputation_copy():
|
253 |
| - """Test imputation with copy=True.""" |
254 |
| - l = 5 |
255 |
| - |
256 |
| - # Test default behaviour and with copy=True |
257 |
| - for params in [{}, {'copy': True}]: |
258 |
| - X = sparse_random_matrix(l, l, density=0.75, random_state=0) |
259 |
| - |
260 |
| - # Dense |
261 |
| - imputer = Imputer(missing_values=0, strategy="mean", **params) |
262 |
| - Xt = imputer.fit(X).transform(X) |
263 |
| - Xt[0, 0] = np.nan |
264 |
| - # Check that the objects are different and that they don't use |
265 |
| - # the same buffer |
266 |
| - assert_false(np.all(X.todense() == Xt)) |
267 |
| - |
268 |
| - # Sparse |
269 |
| - imputer = Imputer(missing_values=0, strategy="mean", **params) |
270 |
| - X = X.todense() |
271 |
| - Xt = imputer.fit(X).transform(X) |
272 |
| - Xt[0, 0] = np.nan |
273 |
| - # Check that the objects are different and that they don't use |
274 |
| - # the same buffer |
275 |
| - assert_false(np.all(X == Xt)) |
| 255 | + """Test imputation with copy""" |
| 256 | + X_orig = sparse_random_matrix(5, 5, density=0.75, random_state=0) |
| 257 | + |
| 258 | + # copy=True, dense |
| 259 | + X = X_orig.copy().todense() |
| 260 | + imputer = Imputer(missing_values=0, strategy="mean", copy=True) |
| 261 | + Xt = imputer.fit(X).transform(X) |
| 262 | + Xt[0, 0] = -1 |
| 263 | + assert_false(np.all(X == Xt)) |
| 264 | + assert_not_equal(X.ctypes.data, Xt.ctypes.data) |
| 265 | + |
| 266 | + # copy=True, sparse |
| 267 | + X = X_orig.copy() |
| 268 | + imputer = Imputer(missing_values=0, strategy="mean", copy=True) |
| 269 | + X = X.todense() |
| 270 | + Xt = imputer.fit(X).transform(X) |
| 271 | + Xt[0, 0] = -1 |
| 272 | + assert_false(np.all(X == Xt)) |
| 273 | + assert_not_equal(X.ctypes.data, Xt.ctypes.data) |
| 274 | + |
| 275 | + # copy=False, dense |
| 276 | + X = X_orig.copy().todense() |
| 277 | + imputer = Imputer(missing_values=0, strategy="mean", copy=False) |
| 278 | + Xt = imputer.fit(X).transform(X) |
| 279 | + Xt[0, 0] = -1 |
| 280 | + assert_true(np.all(X == Xt)) |
| 281 | + assert_equal(X.ctypes.data, Xt.ctypes.data) |
| 282 | + |
| 283 | + # copy=False, sparse |
| 284 | + X = X_orig.copy() |
| 285 | + imputer = Imputer(missing_values=X.data[0], strategy="mean", copy=False, axis=1) |
| 286 | + Xt = imputer.fit(X).transform(X) |
| 287 | + assert_true(np.all(X == Xt)) # Fail... |
| 288 | + |
| 289 | + # Note: If X is sparse and if missing_values=0, then a (dense) copy of X is |
| 290 | + # made, even if copy=False. |
0 commit comments