diff --git a/sklearn/utils/sparsefuncs.py b/sklearn/utils/sparsefuncs.py
index ccaa6eeb28e65..918f32e6da3e5 100644
--- a/sklearn/utils/sparsefuncs.py
+++ b/sklearn/utils/sparsefuncs.py
@@ -467,7 +467,8 @@ def count_nonzero(X, axis=None, sample_weight=None):
     elif axis == 1:
         out = np.diff(X.indptr)
         if sample_weight is None:
-            return out
+            # astype here is for consistency with axis=0 dtype
+            return out.astype('intp')
         return out * sample_weight
     elif axis == 0:
         if sample_weight is None:
diff --git a/sklearn/utils/tests/test_sparsefuncs.py b/sklearn/utils/tests/test_sparsefuncs.py
index 838435a0deab9..03c0c717d3174 100644
--- a/sklearn/utils/tests/test_sparsefuncs.py
+++ b/sklearn/utils/tests/test_sparsefuncs.py
@@ -443,6 +443,19 @@ def test_count_nonzero():
     assert_raises(TypeError, count_nonzero, X_csc)
     assert_raises(ValueError, count_nonzero, X_csr, axis=2)
 
+    assert (count_nonzero(X_csr, axis=0).dtype ==
+            count_nonzero(X_csr, axis=1).dtype)
+    assert (count_nonzero(X_csr, axis=0, sample_weight=sample_weight).dtype ==
+            count_nonzero(X_csr, axis=1, sample_weight=sample_weight).dtype)
+
+    # Check dtypes with large sparse matrices too
+    X_csr.indices = X_csr.indices.astype(np.int64)
+    X_csr.indptr = X_csr.indptr.astype(np.int64)
+    assert (count_nonzero(X_csr, axis=0).dtype ==
+            count_nonzero(X_csr, axis=1).dtype)
+    assert (count_nonzero(X_csr, axis=0, sample_weight=sample_weight).dtype ==
+            count_nonzero(X_csr, axis=1, sample_weight=sample_weight).dtype)
+
 
 def test_csc_row_median():
     # Test csc_row_median actually calculates the median.