Reusing make_int_array function

rth · rth · commit 564f8b77e4e6 · 2017-11-22T09:38:45.000+01:00
diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
@@ -784,15 +784,9 @@ def _count_vocab(self, raw_documents, fixed_vocab):
 
         analyze = self.build_analyzer()
         j_indices = []
-        if sp_version >= (0, 14):
-            # We can use 64-bit indices
-            # NOTE: long on Windows is only 32 bits
-            # indptr stores indices into j_indices, which can be large
-            indptr = _make_long_array()
-        else:
-            # Sparse arrays only support 32-bit integers
-            # j_indices stores feature indices, likely to be < 2^31
-            indptr = _make_int_array()
+        # indptr stores indices into j_indices, which can be large
+        indptr = _make_int_array(dtype='l')
+        values = _make_int_array()
         indptr.append(0)
         for doc in raw_documents:
             feature_counter = {}
@@ -970,18 +964,12 @@ def get_feature_names(self):
                                      key=itemgetter(1))]
 
 
-def _make_int_array():
-    """Construct an array.array of a type suitable for scipy.sparse indices."""
-    return array.array(str("i"))
-
-def _make_long_array():
-    """Construct an array.array of a type suitable for large scipy.sparse indices.
-
-    scipy 0.14 and later can construct sparse matrices with 64 bit integer indices.
+def _make_int_array(dtype='i'):
+    """Construct an array.array of a type suitable for scipy.sparse indices.
 
     NOTE: long on Windows is only 32 bits
     """
-    return array.array(str("l"))
+    return array.array(str(dtype))
 
 
 class TfidfTransformer(BaseEstimator, TransformerMixin):