scikit-learn · OmarManzoor · Dec 2, 2023 · Oct 23, 2023 · Oct 23, 2023 · Oct 23, 2023
diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst
@@ -368,6 +368,10 @@ Changelog
   is a predefined metric listed in :func:`metrics.get_scorer_names` and
   early stopping is enabled. :pr:`26163` by `Thomas Fan`_.
 
+- |Fix| Fixes :class:`ensemble.IsolationForest` when the input is a sparse matrix and
+  `contamination` is set to a float value.
+  :pr:`27645` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 - |API| In :class:`ensemble.AdaBoostClassifier`, the `algorithm` argument `SAMME.R` was
   deprecated and will be removed in 1.6. :pr:`26830` by :user:`Stefanie Senger
   <StefanieSenger>`.

diff --git a/sklearn/ensemble/_iforest.py b/sklearn/ensemble/_iforest.py
@@ -340,7 +340,10 @@ def fit(self, X, y=None, sample_weight=None):
 
         # Else, define offset_ wrt contamination parameter
         # To avoid performing input validation a second time we call
-        # _score_samples rather than score_samples
+        # _score_samples rather than score_samples.
+        # _score_samples expects a CSR matrix, so we convert if necessary.
+        if issparse(X):
+            X = X.tocsr()
         self.offset_ = np.percentile(self._score_samples(X), 100.0 * self.contamination)
 
         return self
@@ -425,7 +428,7 @@ def score_samples(self, X):
             The lower, the more abnormal.
         """
         # Check data
-        X = self._validate_data(X, accept_sparse="csr", dtype=np.float32, reset=False)
+        X = self._validate_data(X, accept_sparse="csr", dtype=tree_dtype, reset=False)
 
         return self._score_samples(X)
 

diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py
@@ -341,3 +341,23 @@ def test_iforest_preserve_feature_names():
     with warnings.catch_warnings():
         warnings.simplefilter("error", UserWarning)
         model.fit(X)
+
+
+@pytest.mark.parametrize("sparse_container", CSC_CONTAINERS + CSR_CONTAINERS)
+def test_iforest_sparse_input_float_contamination(sparse_container):
+    """Check that `IsolationForest` accepts sparse matrix input and float value for
+    contamination.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/27626
+    """
+    X, _ = make_classification(n_samples=50, n_features=4, random_state=0)
+    X = sparse_container(X)
+    X.sort_indices()
+    contamination = 0.1
+    iforest = IsolationForest(
+        n_estimators=5, contamination=contamination, random_state=0
+    ).fit(X)
+
+    X_decision = iforest.decision_function(X)
+    assert (X_decision < 0).sum() / X.shape[0] == pytest.approx(contamination)