Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion sklearn/covariance/_elliptic_envelope.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class EllipticEnvelope(OutlierMixin, MinCovDet):

contamination : float, default=0.1
The amount of contamination of the data set, i.e. the proportion
of outliers in the data set. Range is (0, 0.5).
of outliers in the data set. Range is (0, 0.5].

random_state : int, RandomState instance or None, default=None
Determines the pseudo random number generator for shuffling
Expand Down Expand Up @@ -142,6 +142,11 @@ def fit(self, X, y=None):
y : Ignored
Not used, present for API consistency by convention.
"""
if self.contamination != 'auto':
if not(0. < self.contamination <= .5):
raise ValueError("contamination must be in (0, 0.5], "
"got: %f" % self.contamination)

super().fit(X)
self.offset_ = np.percentile(-self.dist_, 100. * self.contamination)
return self
Expand Down
7 changes: 6 additions & 1 deletion sklearn/ensemble/_iforest.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class IsolationForest(OutlierMixin, BaseBagging):

- If 'auto', the threshold is determined as in the
original paper.
- If float, the contamination should be in the range [0, 0.5].
- If float, the contamination should be in the range (0, 0.5].

.. versionchanged:: 0.22
The default value of ``contamination`` changed from 0.1
Expand Down Expand Up @@ -250,6 +250,11 @@ def fit(self, X, y=None, sample_weight=None):
# ensure that max_sample is in [1, n_samples]:
n_samples = X.shape[0]

if self.contamination != 'auto':
if not(0. < self.contamination <= .5):
raise ValueError("contamination must be in (0, 0.5], "
"got: %f" % self.contamination)

if isinstance(self.max_samples, str):
if self.max_samples == 'auto':
max_samples = min(256, n_samples)
Expand Down
2 changes: 1 addition & 1 deletion sklearn/neighbors/_lof.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ class LocalOutlierFactor(KNeighborsMixin,

- if 'auto', the threshold is determined as in the
original paper,
- if a float, the contamination should be in the range [0, 0.5].
- if a float, the contamination should be in the range (0, 0.5].

.. versionchanged:: 0.22
The default value of ``contamination`` changed from 0.1
Expand Down
8 changes: 5 additions & 3 deletions sklearn/utils/estimator_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2071,9 +2071,10 @@ def check_outliers_train(name, estimator_orig, readonly_memmap=True):
check_outlier_corruption(num_outliers, expected_outliers, decision)

# raises error when contamination is a scalar and not in [0,1]
msg = r"contamination must be in \(0, 0.5]"
for contamination in [-0.5, 2.3]:
estimator.set_params(contamination=contamination)
with raises(ValueError):
with raises(ValueError, match=msg):
estimator.fit(X)


Expand Down Expand Up @@ -2964,9 +2965,10 @@ def check_outliers_fit_predict(name, estimator_orig):
check_outlier_corruption(num_outliers, expected_outliers, decision)

# raises error when contamination is a scalar and not in [0,1]
for contamination in [-0.5, 2.3]:
msg = r"contamination must be in \(0, 0.5]"
for contamination in [-0.5, -0.001, 0.5001, 2.3]:
estimator.set_params(contamination=contamination)
with raises(ValueError):
with raises(ValueError, match=msg):
estimator.fit_predict(X)


Expand Down