Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 2f2a382

Browse files
larsmansamueller
authored andcommitted
BUG in RadiusNeighborClassifier outlier handling
Ruggedized the test, which wasn't doing much previously, and added a test for c0d4015. Also replaced flatten (which copies) with ravel and improved the error message for outliers in the face of outlier_label=None.
1 parent 5a2b047 commit 2f2a382

File tree

2 files changed

+24
-16
lines changed

2 files changed

+24
-16
lines changed

sklearn/neighbors/classification.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -285,22 +285,24 @@ def predict(self, X):
285285
pred_labels = [self._y[ind] for ind in neigh_ind]
286286

287287
if self.outlier_label is not None:
288-
outlier_label = np.array((self.outlier_label, ))
289-
small_value = np.array((1e-6, ))
288+
outlier_label = np.array([self.outlier_label])
289+
small_value = np.array([1e-6])
290290
for i, pl in enumerate(pred_labels):
291291
# Check that all have at least 1 neighbor
292292
if len(pl) < 1:
293293
pred_labels[i] = outlier_label
294294
neigh_dist[i] = small_value
295295
else:
296-
for pl in pred_labels:
296+
for i, pl in enumerate(pred_labels):
297297
# Check that all have at least 1 neighbor
298+
# TODO we should gather all outliers, or the first k,
299+
# before constructing the error message.
298300
if len(pl) < 1:
299-
raise ValueError('no neighbors found for a test sample, '
301+
raise ValueError('No neighbors found for test sample %d, '
300302
'you can try using larger radius, '
301303
'give a label for outliers, '
302-
'or consider removing them in your '
303-
'dataset')
304+
'or consider removing it from your '
305+
'dataset.' % i)
304306

305307
weights = _get_weights(neigh_dist, self.weights)
306308

@@ -312,10 +314,10 @@ def predict(self, X):
312314
for (pl, w) in zip(pred_labels, weights)],
313315
dtype=np.int)
314316

315-
mode = mode.flatten().astype(np.int)
317+
mode = mode.ravel().astype(np.int)
316318
# map indices to classes
317319
prediction = self.classes_.take(mode)
318320
if self.outlier_label is not None:
319321
# reset outlier label
320-
prediction[mode == outlier_label] = self.outlier_label
322+
prediction[prediction == outlier_label] = self.outlier_label
321323
return prediction

sklearn/neighbors/tests/test_neighbors.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -274,14 +274,20 @@ def test_radius_neighbors_classifier_when_no_neighbors():
274274

275275
weight_func = _weight_func
276276

277-
for algorithm in ALGORITHMS:
278-
for weights in ['uniform', 'distance', weight_func]:
279-
clf = neighbors.RadiusNeighborsClassifier(radius=radius,
280-
weights=weights,
281-
algorithm=algorithm)
282-
clf.fit(X, y)
283-
clf.predict(z1)
284-
assert_raises(ValueError, clf.predict, z2)
277+
for outlier_label in [0, -1, None]:
278+
for algorithm in ALGORITHMS:
279+
for weights in ['uniform', 'distance', weight_func]:
280+
rnc = neighbors.RadiusNeighborsClassifier
281+
clf = rnc(radius=radius, weights=weights, algorithm=algorithm,
282+
outlier_label=outlier_label)
283+
clf.fit(X, y)
284+
assert_array_equal(np.array([1, 2]),
285+
clf.predict(z1))
286+
if outlier_label is None:
287+
assert_raises(ValueError, clf.predict, z2)
288+
elif False:
289+
assert_array_equal(np.array([1, outlier_label]),
290+
clf.predict(z2))
285291

286292

287293
def test_radius_neighbors_classifier_outlier_labeling():

0 commit comments

Comments
 (0)