diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py index f2d5753d00f86..56d4b4e54ae6b 100644 --- a/sklearn/preprocessing/data.py +++ b/sklearn/preprocessing/data.py @@ -103,7 +103,7 @@ def scale(X, axis=0, with_mean=True, with_std=True, copy=True): copy : boolean, optional, default True set to False to perform inplace row normalization and avoid a copy (if the input is already a numpy array or a scipy.sparse - CSR matrix and if axis is 1). + CSC matrix and if axis is 1). Notes ----- @@ -113,10 +113,10 @@ def scale(X, axis=0, with_mean=True, with_std=True, copy=True): Instead the caller is expected to either set explicitly `with_mean=False` (in that case, only variance scaling will be - performed on the features of the CSR matrix) or to call `X.toarray()` + performed on the features of the CSC matrix) or to call `X.toarray()` if he/she expects the materialized dense array to fit in memory. - To avoid memory copy the caller should pass a CSR matrix. + To avoid memory copy the caller should pass a CSC matrix. See also -------- @@ -124,7 +124,7 @@ def scale(X, axis=0, with_mean=True, with_std=True, copy=True): scaling using the ``Transformer`` API (e.g. as part of a preprocessing :class:`sklearn.pipeline.Pipeline`) """ - X = check_array(X, accept_sparse='csr', copy=copy, ensure_2d=False, + X = check_array(X, accept_sparse='csc', copy=copy, ensure_2d=False, warn_on_dtype=True, estimator='the scale function', dtype=FLOAT_DTYPES) if sparse.issparse(X): @@ -135,11 +135,6 @@ def scale(X, axis=0, with_mean=True, with_std=True, copy=True): if axis != 0: raise ValueError("Can only scale sparse matrix on axis=0, " " got axis=%d" % axis) - if not sparse.isspmatrix_csr(X): - X = X.tocsr() - copy = False - if copy: - X = X.copy() if with_std: _, var = mean_variance_axis(X, axis=0) var = _handle_zeros_in_scale(var, copy=False) @@ -150,8 +145,6 @@ def scale(X, axis=0, with_mean=True, with_std=True, copy=True): mean_ = np.mean(X, axis) if with_std: scale_ = np.std(X, axis) - if copy: - X = X.copy() # Xr is a view on the original array that enables easy use of # broadcasting on the axis in which we are interested in Xr = np.rollaxis(X, axis) diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index 5f264f1cf1bf6..baa62cbbaffe2 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -843,7 +843,7 @@ def test_scale_function_without_centering(): # null scale X_csr_scaled = scale(X_csr, with_mean=False, with_std=False, copy=True) - assert_array_almost_equal(X_csr.data, X_csr_scaled.data) + assert_array_almost_equal(X_csr.toarray(), X_csr_scaled.toarray()) def test_robust_scale_axis1():