From 2fece8fb3d08c6ad44e78cf5ad8b0a50ebeb40a4 Mon Sep 17 00:00:00 2001 From: Paulo Date: Thu, 16 Jun 2022 12:54:29 -0300 Subject: [PATCH 1/7] docstring fix --- sklearn/datasets/_lfw.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/datasets/_lfw.py b/sklearn/datasets/_lfw.py index dc1267af59f96..be01ae6279e27 100644 --- a/sklearn/datasets/_lfw.py +++ b/sklearn/datasets/_lfw.py @@ -464,7 +464,7 @@ def fetch_lfw_pairs( slice_ : tuple of slice, default=(slice(70, 195), slice(78, 172)) Provide a custom 2D slice (height, width) to extract the 'interesting' part of the jpeg files and avoid use statistical - correlation from the background + correlation from the background. download_if_missing : bool, default=True If False, raise a IOError if the data is not locally available @@ -491,7 +491,6 @@ def fetch_lfw_pairs( The two label values being different persons or the same person. DESCR : str Description of the Labeled Faces in the Wild (LFW) dataset. - """ lfw_home, data_folder_path = _check_fetch_lfw( data_home=data_home, funneled=funneled, download_if_missing=download_if_missing From 1974c24aa1886f7055f42a1619d8b0a9d2b91c0f Mon Sep 17 00:00:00 2001 From: Paulo Date: Thu, 16 Jun 2022 13:27:54 -0300 Subject: [PATCH 2/7] log loss func doc --- sklearn/metrics/_classification.py | 12 +++++++----- sklearn/tests/test_docstrings.py | 2 -- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index a1fb638bb29db..e17c53a33f1a4 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -2545,22 +2545,24 @@ def log_loss( Returns ------- loss : float + Raw Log Loss value. This is the value calculated using the + ``y_pred`` and ``y_true`` based on the abover formula. Notes ----- The logarithm used is the natural logarithm (base-e). + References + ---------- + C.M. Bishop (2006). Pattern Recognition and Machine Learning. Springer, + p. 209. + Examples -------- >>> from sklearn.metrics import log_loss >>> log_loss(["spam", "ham", "ham", "spam"], ... [[.1, .9], [.9, .1], [.8, .2], [.35, .65]]) 0.21616... - - References - ---------- - C.M. Bishop (2006). Pattern Recognition and Machine Learning. Springer, - p. 209. """ y_pred = check_array(y_pred, ensure_2d=False) check_consistent_length(y_pred, y_true, sample_weight) diff --git a/sklearn/tests/test_docstrings.py b/sklearn/tests/test_docstrings.py index cc5883f3acc4b..1489dd5c6da72 100644 --- a/sklearn/tests/test_docstrings.py +++ b/sklearn/tests/test_docstrings.py @@ -13,7 +13,6 @@ FUNCTION_DOCSTRING_IGNORE_LIST = [ "sklearn.datasets._kddcup99.fetch_kddcup99", - "sklearn.datasets._lfw.fetch_lfw_pairs", "sklearn.datasets._lfw.fetch_lfw_people", "sklearn.datasets._samples_generator.make_gaussian_quantiles", "sklearn.datasets._samples_generator.make_spd_matrix", @@ -35,7 +34,6 @@ "sklearn.metrics._classification.brier_score_loss", "sklearn.metrics._classification.cohen_kappa_score", "sklearn.metrics._classification.jaccard_score", - "sklearn.metrics._classification.log_loss", "sklearn.metrics._plot.det_curve.plot_det_curve", "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve", "sklearn.metrics._ranking.coverage_error", From d39296f78bd162ee47b7194cd58b0fe544c38d91 Mon Sep 17 00:00:00 2001 From: Paulo Date: Thu, 16 Jun 2022 13:46:36 -0300 Subject: [PATCH 3/7] list fix --- sklearn/tests/test_docstrings.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/tests/test_docstrings.py b/sklearn/tests/test_docstrings.py index 1489dd5c6da72..36ffe78ecfb14 100644 --- a/sklearn/tests/test_docstrings.py +++ b/sklearn/tests/test_docstrings.py @@ -13,6 +13,7 @@ FUNCTION_DOCSTRING_IGNORE_LIST = [ "sklearn.datasets._kddcup99.fetch_kddcup99", + "sklearn.datasets._lfw.fetch_lfw_pairs", "sklearn.datasets._lfw.fetch_lfw_people", "sklearn.datasets._samples_generator.make_gaussian_quantiles", "sklearn.datasets._samples_generator.make_spd_matrix", From ba0462959a48d7bd46ed932232142526492a1a8c Mon Sep 17 00:00:00 2001 From: Paulo Sergio Soares <56484955+paulo-smcs@users.noreply.github.com> Date: Tue, 21 Jun 2022 18:08:56 -0300 Subject: [PATCH 4/7] Update sklearn/metrics/_classification.py Co-authored-by: Thomas J. Fan --- sklearn/metrics/_classification.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index e17c53a33f1a4..ff591ba0c85c5 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -2545,8 +2545,7 @@ def log_loss( Returns ------- loss : float - Raw Log Loss value. This is the value calculated using the - ``y_pred`` and ``y_true`` based on the abover formula. + Log loss, aka logistic loss or cross-entropy loss. Notes ----- From 8c44175c733277a83460928c83b08cafa7914da0 Mon Sep 17 00:00:00 2001 From: Paulo Sergio Soares <56484955+paulo-smcs@users.noreply.github.com> Date: Tue, 21 Jun 2022 18:10:10 -0300 Subject: [PATCH 5/7] Update _lfw.py --- sklearn/datasets/_lfw.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/datasets/_lfw.py b/sklearn/datasets/_lfw.py index be01ae6279e27..24a269f4f1493 100644 --- a/sklearn/datasets/_lfw.py +++ b/sklearn/datasets/_lfw.py @@ -464,7 +464,7 @@ def fetch_lfw_pairs( slice_ : tuple of slice, default=(slice(70, 195), slice(78, 172)) Provide a custom 2D slice (height, width) to extract the 'interesting' part of the jpeg files and avoid use statistical - correlation from the background. + correlation from the background download_if_missing : bool, default=True If False, raise a IOError if the data is not locally available @@ -491,6 +491,7 @@ def fetch_lfw_pairs( The two label values being different persons or the same person. DESCR : str Description of the Labeled Faces in the Wild (LFW) dataset. + """ lfw_home, data_folder_path = _check_fetch_lfw( data_home=data_home, funneled=funneled, download_if_missing=download_if_missing From 735ec542cf7da10691f990bef720fb16eb120010 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9mie=20du=20Boisberranger?= <34657725+jeremiedbb@users.noreply.github.com> Date: Wed, 22 Jun 2022 13:43:36 +0200 Subject: [PATCH 6/7] Update sklearn/datasets/_lfw.py --- sklearn/datasets/_lfw.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/datasets/_lfw.py b/sklearn/datasets/_lfw.py index 24a269f4f1493..f220754b4d282 100644 --- a/sklearn/datasets/_lfw.py +++ b/sklearn/datasets/_lfw.py @@ -491,7 +491,6 @@ def fetch_lfw_pairs( The two label values being different persons or the same person. DESCR : str Description of the Labeled Faces in the Wild (LFW) dataset. - """ lfw_home, data_folder_path = _check_fetch_lfw( data_home=data_home, funneled=funneled, download_if_missing=download_if_missing From 887d04580758f81346b5c959ddfc4e5aea1181e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9mie=20du=20Boisberranger?= <34657725+jeremiedbb@users.noreply.github.com> Date: Wed, 22 Jun 2022 13:45:05 +0200 Subject: [PATCH 7/7] Update _lfw.py --- sklearn/datasets/_lfw.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/datasets/_lfw.py b/sklearn/datasets/_lfw.py index f220754b4d282..dc1267af59f96 100644 --- a/sklearn/datasets/_lfw.py +++ b/sklearn/datasets/_lfw.py @@ -491,6 +491,7 @@ def fetch_lfw_pairs( The two label values being different persons or the same person. DESCR : str Description of the Labeled Faces in the Wild (LFW) dataset. + """ lfw_home, data_folder_path = _check_fetch_lfw( data_home=data_home, funneled=funneled, download_if_missing=download_if_missing