@@ -112,30 +112,45 @@ def test_hasher_zeros():
112
112
113
113
@ignore_warnings (category = DeprecationWarning )
114
114
def test_hasher_alternate_sign ():
115
- # the last two tokens produce a hash collision that sums as 0
116
- X = [["foo" , "bar" , "baz" , "investigation need" , "records" ]]
115
+ X = [list ("Thequickbrownfoxjumped" )]
117
116
118
117
Xt = FeatureHasher (alternate_sign = True , non_negative = False ,
119
118
input_type = 'string' ).fit_transform (X )
120
- assert_true (Xt .data .min () < 0 and Xt .data .max () > 0 )
121
- # check that we have a collision that produces a 0 count
122
- assert_true (len (Xt .data ) < len (X [0 ]))
123
- assert_true ((Xt .data == 0. ).any ())
119
+ assert Xt .data .min () < 0 and Xt .data .max () > 0
124
120
125
121
Xt = FeatureHasher (alternate_sign = True , non_negative = True ,
126
122
input_type = 'string' ).fit_transform (X )
127
- assert_true (( Xt .data >= 0 ). all ()) # all counts are positive
128
- assert_true (( Xt . data == 0. ). any ()) # we still have a collision
123
+ assert Xt .data . min () > 0
124
+
129
125
Xt = FeatureHasher (alternate_sign = False , non_negative = True ,
130
126
input_type = 'string' ).fit_transform (X )
131
- assert_true (( Xt .data > 0 ). all ()) # strictly positive counts
127
+ assert Xt .data . min () > 0
132
128
Xt_2 = FeatureHasher (alternate_sign = False , non_negative = False ,
133
129
input_type = 'string' ).fit_transform (X )
134
130
# With initially positive features, the non_negative option should
135
131
# have no impact when alternate_sign=False
136
132
assert_array_equal (Xt .data , Xt_2 .data )
137
133
138
134
135
+ @ignore_warnings (category = DeprecationWarning )
136
+ def test_hash_collisions ():
137
+ X = [list ("Thequickbrownfoxjumped" )]
138
+
139
+ Xt = FeatureHasher (alternate_sign = True , non_negative = False ,
140
+ n_features = 1 , input_type = 'string' ).fit_transform (X )
141
+ # check that some of the hashed tokens are added
142
+ # with an opposite sign and cancel out
143
+ assert abs (Xt .data [0 ]) < len (X [0 ])
144
+
145
+ Xt = FeatureHasher (alternate_sign = True , non_negative = True ,
146
+ n_features = 1 , input_type = 'string' ).fit_transform (X )
147
+ assert abs (Xt .data [0 ]) < len (X [0 ])
148
+
149
+ Xt = FeatureHasher (alternate_sign = False , non_negative = True ,
150
+ n_features = 1 , input_type = 'string' ).fit_transform (X )
151
+ assert Xt .data [0 ] == len (X [0 ])
152
+
153
+
139
154
@ignore_warnings (category = DeprecationWarning )
140
155
def test_hasher_negative ():
141
156
X = [{"foo" : 2 , "bar" : - 4 , "baz" : - 1 }.items ()]
0 commit comments