diff --git a/sklearn/utils/sparsefuncs.py b/sklearn/utils/sparsefuncs.py index ccaa6eeb28e65..918f32e6da3e5 100644 --- a/sklearn/utils/sparsefuncs.py +++ b/sklearn/utils/sparsefuncs.py @@ -467,7 +467,8 @@ def count_nonzero(X, axis=None, sample_weight=None): elif axis == 1: out = np.diff(X.indptr) if sample_weight is None: - return out + # astype here is for consistency with axis=0 dtype + return out.astype('intp') return out * sample_weight elif axis == 0: if sample_weight is None: diff --git a/sklearn/utils/tests/test_sparsefuncs.py b/sklearn/utils/tests/test_sparsefuncs.py index 838435a0deab9..03c0c717d3174 100644 --- a/sklearn/utils/tests/test_sparsefuncs.py +++ b/sklearn/utils/tests/test_sparsefuncs.py @@ -443,6 +443,19 @@ def test_count_nonzero(): assert_raises(TypeError, count_nonzero, X_csc) assert_raises(ValueError, count_nonzero, X_csr, axis=2) + assert (count_nonzero(X_csr, axis=0).dtype == + count_nonzero(X_csr, axis=1).dtype) + assert (count_nonzero(X_csr, axis=0, sample_weight=sample_weight).dtype == + count_nonzero(X_csr, axis=1, sample_weight=sample_weight).dtype) + + # Check dtypes with large sparse matrices too + X_csr.indices = X_csr.indices.astype(np.int64) + X_csr.indptr = X_csr.indptr.astype(np.int64) + assert (count_nonzero(X_csr, axis=0).dtype == + count_nonzero(X_csr, axis=1).dtype) + assert (count_nonzero(X_csr, axis=0, sample_weight=sample_weight).dtype == + count_nonzero(X_csr, axis=1, sample_weight=sample_weight).dtype) + def test_csc_row_median(): # Test csc_row_median actually calculates the median.