@@ -784,15 +784,9 @@ def _count_vocab(self, raw_documents, fixed_vocab):
784
784
785
785
analyze = self .build_analyzer ()
786
786
j_indices = []
787
- if sp_version >= (0 , 14 ):
788
- # We can use 64-bit indices
789
- # NOTE: long on Windows is only 32 bits
790
- # indptr stores indices into j_indices, which can be large
791
- indptr = _make_long_array ()
792
- else :
793
- # Sparse arrays only support 32-bit integers
794
- # j_indices stores feature indices, likely to be < 2^31
795
- indptr = _make_int_array ()
787
+ # indptr stores indices into j_indices, which can be large
788
+ indptr = _make_int_array (dtype = 'l' )
789
+ values = _make_int_array ()
796
790
indptr .append (0 )
797
791
for doc in raw_documents :
798
792
feature_counter = {}
@@ -970,18 +964,12 @@ def get_feature_names(self):
970
964
key = itemgetter (1 ))]
971
965
972
966
973
- def _make_int_array ():
974
- """Construct an array.array of a type suitable for scipy.sparse indices."""
975
- return array .array (str ("i" ))
976
-
977
- def _make_long_array ():
978
- """Construct an array.array of a type suitable for large scipy.sparse indices.
979
-
980
- scipy 0.14 and later can construct sparse matrices with 64 bit integer indices.
967
+ def _make_int_array (dtype = 'i' ):
968
+ """Construct an array.array of a type suitable for scipy.sparse indices.
981
969
982
970
NOTE: long on Windows is only 32 bits
983
971
"""
984
- return array .array (str ("l" ))
972
+ return array .array (str (dtype ))
985
973
986
974
987
975
class TfidfTransformer (BaseEstimator , TransformerMixin ):
0 commit comments