From 6a56343e4c281ac1588b726acac5ad24575bff28 Mon Sep 17 00:00:00 2001 From: pierrezeb <36326642+pierrezeb@users.noreply.github.com> Date: Fri, 15 Nov 2019 19:24:28 +0100 Subject: [PATCH 1/2] Fixes divide by zero error / propagation problem In the cases where none of the K neighbors of a given point have a label then normalizer equals 0 and generates divide by zero erros. This creates NaNs in the label distribution and blocks label propagation. By replacing 0 values of the normalizer by 1 this problem is avoided. --- sklearn/semi_supervised/_label_propagation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sklearn/semi_supervised/_label_propagation.py b/sklearn/semi_supervised/_label_propagation.py index 0ec687aae7d20..f2009ba1ce80f 100644 --- a/sklearn/semi_supervised/_label_propagation.py +++ b/sklearn/semi_supervised/_label_propagation.py @@ -273,6 +273,7 @@ def fit(self, X, y): if self._variant == 'propagation': normalizer = np.sum( self.label_distributions_, axis=1)[:, np.newaxis] + normalizer[normalizer == 0 ] = 1 self.label_distributions_ /= normalizer self.label_distributions_ = np.where(unlabeled, self.label_distributions_, @@ -289,6 +290,7 @@ def fit(self, X, y): self.n_iter_ += 1 normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis] + normalizer[normalizer == 0 ] = 1 self.label_distributions_ /= normalizer # set the transduction item From b4f823f2302675c7aeb178ba49f9f8305b1de9e5 Mon Sep 17 00:00:00 2001 From: pierrezeb <36326642+pierrezeb@users.noreply.github.com> Date: Fri, 15 Nov 2019 19:29:43 +0100 Subject: [PATCH 2/2] Fixes divide by zero error / propagation problem In the cases where none of the K neighbors of a given point have a label then normalizer equals 0 and generates divide by zero erros. This creates NaNs in the label distribution and blocks label propagation. By replacing 0 values of the normalizer by 1 this problem is avoided. --- sklearn/semi_supervised/_label_propagation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/semi_supervised/_label_propagation.py b/sklearn/semi_supervised/_label_propagation.py index f2009ba1ce80f..eb6657fc4d234 100644 --- a/sklearn/semi_supervised/_label_propagation.py +++ b/sklearn/semi_supervised/_label_propagation.py @@ -273,7 +273,7 @@ def fit(self, X, y): if self._variant == 'propagation': normalizer = np.sum( self.label_distributions_, axis=1)[:, np.newaxis] - normalizer[normalizer == 0 ] = 1 + normalizer[normalizer == 0] = 1 self.label_distributions_ /= normalizer self.label_distributions_ = np.where(unlabeled, self.label_distributions_, @@ -290,7 +290,7 @@ def fit(self, X, y): self.n_iter_ += 1 normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis] - normalizer[normalizer == 0 ] = 1 + normalizer[normalizer == 0] = 1 self.label_distributions_ /= normalizer # set the transduction item