From c7e6fed661e745eb775d89d83f6d69941ddadccc Mon Sep 17 00:00:00 2001 From: Divyaprabha M Date: Sun, 12 Jan 2020 11:17:47 +0530 Subject: [PATCH 01/19] Mulioutput support for REF Checked the type of the target variable, if the target is equal to multioutput then multioutput is set to zero before check_X_y --- sklearn/feature_selection/_rfe.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sklearn/feature_selection/_rfe.py b/sklearn/feature_selection/_rfe.py index 12e99175c9d61..7320d8d0c0930 100644 --- a/sklearn/feature_selection/_rfe.py +++ b/sklearn/feature_selection/_rfe.py @@ -13,6 +13,7 @@ from ..utils.metaestimators import if_delegate_has_method from ..utils.metaestimators import _safe_split from ..utils.validation import check_is_fitted +from ..utils.multiclass import type_of_target from ..base import BaseEstimator from ..base import MetaEstimatorMixin from ..base import clone @@ -155,8 +156,13 @@ def _fit(self, X, y, step_score=None): # self.scores_ will not be calculated when calling _fit through fit tags = self._get_tags() + type_y = type_of_target(y) + + multioutput = 'multioutput' in type_y + + X, y = check_X_y(X, y, "csc", ensure_min_features=2, - force_all_finite=not tags.get('allow_nan', True)) + force_all_finite=not tags.get('allow_nan', True), multi_output=multioutput) # Initialization n_features = X.shape[1] if self.n_features_to_select is None: From b0154c4184ace48569d2722ff790c68e7b3cf8b9 Mon Sep 17 00:00:00 2001 From: Divyaprabha M Date: Sun, 12 Jan 2020 13:03:25 +0530 Subject: [PATCH 02/19] Multi-output support for RFE --- sklearn/feature_selection/_rfe.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/sklearn/feature_selection/_rfe.py b/sklearn/feature_selection/_rfe.py index 7320d8d0c0930..920296f8d88c9 100644 --- a/sklearn/feature_selection/_rfe.py +++ b/sklearn/feature_selection/_rfe.py @@ -157,10 +157,7 @@ def _fit(self, X, y, step_score=None): tags = self._get_tags() type_y = type_of_target(y) - multioutput = 'multioutput' in type_y - - X, y = check_X_y(X, y, "csc", ensure_min_features=2, force_all_finite=not tags.get('allow_nan', True), multi_output=multioutput) # Initialization From d91a562e66ab716ad2cc19d187c4d07be59f7d61 Mon Sep 17 00:00:00 2001 From: Divyaprabha M Date: Sun, 12 Jan 2020 16:47:53 +0530 Subject: [PATCH 03/19] [WIP] Multi-output support for RFE --- sklearn/feature_selection/_rfe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/feature_selection/_rfe.py b/sklearn/feature_selection/_rfe.py index 920296f8d88c9..5aaf6eed31f15 100644 --- a/sklearn/feature_selection/_rfe.py +++ b/sklearn/feature_selection/_rfe.py @@ -157,9 +157,9 @@ def _fit(self, X, y, step_score=None): tags = self._get_tags() type_y = type_of_target(y) - multioutput = 'multioutput' in type_y + flag = 'multioutput' in type_y X, y = check_X_y(X, y, "csc", ensure_min_features=2, - force_all_finite=not tags.get('allow_nan', True), multi_output=multioutput) + force_all_finite=not tags.get('allow_nan', True), multi_output=flag) # Initialization n_features = X.shape[1] if self.n_features_to_select is None: From ee73f6621ee039ffb68913fa29c60f62f621da0b Mon Sep 17 00:00:00 2001 From: Divyaprabha M Date: Sun, 12 Jan 2020 16:54:18 +0530 Subject: [PATCH 04/19] [WIP] Multi-ouput support for RFE --- sklearn/feature_selection/_rfe.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/feature_selection/_rfe.py b/sklearn/feature_selection/_rfe.py index 5aaf6eed31f15..bf49f57bfa5ea 100644 --- a/sklearn/feature_selection/_rfe.py +++ b/sklearn/feature_selection/_rfe.py @@ -157,9 +157,10 @@ def _fit(self, X, y, step_score=None): tags = self._get_tags() type_y = type_of_target(y) - flag = 'multioutput' in type_y + multioutput = 'multioutput' in type_y X, y = check_X_y(X, y, "csc", ensure_min_features=2, - force_all_finite=not tags.get('allow_nan', True), multi_output=flag) + force_all_finite=not tags.get('allow_nan', True), + multi_output=multioutput) # Initialization n_features = X.shape[1] if self.n_features_to_select is None: From 2319786ea8e8d83448b0050204a0860c995e354f Mon Sep 17 00:00:00 2001 From: Divyaprabha M Date: Sun, 12 Jan 2020 16:58:41 +0530 Subject: [PATCH 05/19] [WIP] Multioutput support for RFE --- sklearn/feature_selection/_rfe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/feature_selection/_rfe.py b/sklearn/feature_selection/_rfe.py index bf49f57bfa5ea..fed0183b85264 100644 --- a/sklearn/feature_selection/_rfe.py +++ b/sklearn/feature_selection/_rfe.py @@ -159,7 +159,7 @@ def _fit(self, X, y, step_score=None): type_y = type_of_target(y) multioutput = 'multioutput' in type_y X, y = check_X_y(X, y, "csc", ensure_min_features=2, - force_all_finite=not tags.get('allow_nan', True), + force_all_finite=not tags.get('allow_nan', True), multi_output=multioutput) # Initialization n_features = X.shape[1] From 797500ae8dccec8aef3681f37419dd8e4d34c0af Mon Sep 17 00:00:00 2001 From: Divyaprabha M Date: Tue, 14 Jan 2020 12:11:20 +0530 Subject: [PATCH 06/19] ENH add multioutput support for RFE --- sklearn/feature_selection/_rfe.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sklearn/feature_selection/_rfe.py b/sklearn/feature_selection/_rfe.py index fed0183b85264..7e874f74cb550 100644 --- a/sklearn/feature_selection/_rfe.py +++ b/sklearn/feature_selection/_rfe.py @@ -493,8 +493,12 @@ def fit(self, X, y, groups=None): train/test set. Only used in conjunction with a "Group" :term:`cv` instance (e.g., :class:`~sklearn.model_selection.GroupKFold`). """ - X, y = check_X_y(X, y, "csr", ensure_min_features=2, - force_all_finite=False) + tags = self._get_tags() + type_y = type_of_target(y) + multioutput = 'multioutput' in type_y + X, y = check_X_y(X, y, "csc", ensure_min_features=2, + force_all_finite=not tags.get('allow_nan', True), + multi_output=multioutput) # Initialization cv = check_cv(self.cv, y, is_classifier(self.estimator)) From 9d344d2cad6950480508f18862d66521fd116127 Mon Sep 17 00:00:00 2001 From: Divyaprabha M Date: Tue, 14 Jan 2020 16:10:10 +0530 Subject: [PATCH 07/19] ENH add multioutput support for RFE #16103 --- sklearn/feature_selection/tests/test_rfe.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py index ccd3c0a1b0e83..c1e566fdba229 100644 --- a/sklearn/feature_selection/tests/test_rfe.py +++ b/sklearn/feature_selection/tests/test_rfe.py @@ -395,3 +395,12 @@ def test_rfe_allow_nan_inf_in_x(cv): rfe = RFE(estimator=clf) rfe.fit(X, y) rfe.transform(X) + +def test_multioutput(): + X = np.array([[1,2,3],[2,3,4],[3,4,5]]) + # create y with more than 1 column + y = np.array([[1,1],[1,0],[1,0]]) + clf = RandomForestClassifier(n_estimators=5 + ,max_depth=2) + rfe_test = RFE(clf) + rfe_test.fit(X,y) From 9a9e88341fdaf4d99ee5a4b0adab0eda2361658a Mon Sep 17 00:00:00 2001 From: Divyaprabha M Date: Tue, 14 Jan 2020 16:16:03 +0530 Subject: [PATCH 08/19] ENH add multioutput support for RFE #16103 --- sklearn/feature_selection/tests/test_rfe.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py index c1e566fdba229..9e1c0bfd93960 100644 --- a/sklearn/feature_selection/tests/test_rfe.py +++ b/sklearn/feature_selection/tests/test_rfe.py @@ -396,11 +396,11 @@ def test_rfe_allow_nan_inf_in_x(cv): rfe.fit(X, y) rfe.transform(X) + def test_multioutput(): - X = np.array([[1,2,3],[2,3,4],[3,4,5]]) + X = np.array([[1, 2, 3], [2, 3, 4], [3, 4, 5]]) # create y with more than 1 column - y = np.array([[1,1],[1,0],[1,0]]) - clf = RandomForestClassifier(n_estimators=5 - ,max_depth=2) + y = np.array([[1, 1], [1, 0], [1, 0]]) + clf = RandomForestClassifier(n_estimators=5) rfe_test = RFE(clf) rfe_test.fit(X,y) From 7d30314a386079f892a0fc00e96241a359aec7d7 Mon Sep 17 00:00:00 2001 From: Divyaprabha M Date: Tue, 14 Jan 2020 16:22:16 +0530 Subject: [PATCH 09/19] ENH add multioutput support for RFE #16103 --- sklearn/feature_selection/tests/test_rfe.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py index 9e1c0bfd93960..c39b93147babf 100644 --- a/sklearn/feature_selection/tests/test_rfe.py +++ b/sklearn/feature_selection/tests/test_rfe.py @@ -396,11 +396,10 @@ def test_rfe_allow_nan_inf_in_x(cv): rfe.fit(X, y) rfe.transform(X) - def test_multioutput(): - X = np.array([[1, 2, 3], [2, 3, 4], [3, 4, 5]]) - # create y with more than 1 column - y = np.array([[1, 1], [1, 0], [1, 0]]) - clf = RandomForestClassifier(n_estimators=5) - rfe_test = RFE(clf) - rfe_test.fit(X,y) + X = np.array([[1, 2, 3], [2, 3, 4],[3, 4, 5]]) + # create y with more than 1 column + y = np.array([[1, 1], [1, 0], [1, 0]]) + clf = RandomForestClassifier(n_estimators=5) + rfe_test = RFE(clf) + rfe_test.fit(X,y) From 5fc6e8618a3d144d7bcebcc9b397ed13256ebd2e Mon Sep 17 00:00:00 2001 From: Divyaprabha M Date: Tue, 14 Jan 2020 16:29:00 +0530 Subject: [PATCH 10/19] ENH add multioutput support for RFE --- sklearn/feature_selection/_rfe.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sklearn/feature_selection/_rfe.py b/sklearn/feature_selection/_rfe.py index 7e874f74cb550..59ae071a98b87 100644 --- a/sklearn/feature_selection/_rfe.py +++ b/sklearn/feature_selection/_rfe.py @@ -156,11 +156,9 @@ def _fit(self, X, y, step_score=None): # self.scores_ will not be calculated when calling _fit through fit tags = self._get_tags() - type_y = type_of_target(y) - multioutput = 'multioutput' in type_y X, y = check_X_y(X, y, "csc", ensure_min_features=2, force_all_finite=not tags.get('allow_nan', True), - multi_output=multioutput) + multi_output=True) # Initialization n_features = X.shape[1] if self.n_features_to_select is None: @@ -494,11 +492,9 @@ def fit(self, X, y, groups=None): instance (e.g., :class:`~sklearn.model_selection.GroupKFold`). """ tags = self._get_tags() - type_y = type_of_target(y) - multioutput = 'multioutput' in type_y X, y = check_X_y(X, y, "csc", ensure_min_features=2, force_all_finite=not tags.get('allow_nan', True), - multi_output=multioutput) + multi_output=True) # Initialization cv = check_cv(self.cv, y, is_classifier(self.estimator)) From eb432cb02b0714211d19339dc0b1388cd9c10d08 Mon Sep 17 00:00:00 2001 From: Divyaprabha M Date: Tue, 14 Jan 2020 16:30:45 +0530 Subject: [PATCH 11/19] ENH add multioutput support for RFE --- sklearn/feature_selection/tests/test_rfe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py index c39b93147babf..3983b258cc707 100644 --- a/sklearn/feature_selection/tests/test_rfe.py +++ b/sklearn/feature_selection/tests/test_rfe.py @@ -397,9 +397,9 @@ def test_rfe_allow_nan_inf_in_x(cv): rfe.transform(X) def test_multioutput(): - X = np.array([[1, 2, 3], [2, 3, 4],[3, 4, 5]]) + X = np.array([[1, 2, 3], [2, 3, 4], [3, 4, 5]]) # create y with more than 1 column y = np.array([[1, 1], [1, 0], [1, 0]]) clf = RandomForestClassifier(n_estimators=5) rfe_test = RFE(clf) - rfe_test.fit(X,y) + rfe_test.fit(X, y) From d123fd80e0b2d3a2ec859ecc0754dc7c3b751eb8 Mon Sep 17 00:00:00 2001 From: Divyaprabha M Date: Tue, 14 Jan 2020 16:32:43 +0530 Subject: [PATCH 12/19] ENH add multioutput support for RFE --- sklearn/feature_selection/tests/test_rfe.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py index 3983b258cc707..26725f3440f82 100644 --- a/sklearn/feature_selection/tests/test_rfe.py +++ b/sklearn/feature_selection/tests/test_rfe.py @@ -396,6 +396,7 @@ def test_rfe_allow_nan_inf_in_x(cv): rfe.fit(X, y) rfe.transform(X) + def test_multioutput(): X = np.array([[1, 2, 3], [2, 3, 4], [3, 4, 5]]) # create y with more than 1 column From ec7b6327de25dbad2f747537ce61edb402a31d49 Mon Sep 17 00:00:00 2001 From: Divyaprabha M Date: Tue, 14 Jan 2020 16:34:30 +0530 Subject: [PATCH 13/19] ENH add multioutput support for RFE --- sklearn/feature_selection/tests/test_rfe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py index 26725f3440f82..65b67444a6e68 100644 --- a/sklearn/feature_selection/tests/test_rfe.py +++ b/sklearn/feature_selection/tests/test_rfe.py @@ -396,7 +396,7 @@ def test_rfe_allow_nan_inf_in_x(cv): rfe.fit(X, y) rfe.transform(X) - + def test_multioutput(): X = np.array([[1, 2, 3], [2, 3, 4], [3, 4, 5]]) # create y with more than 1 column From 4d7ee07785f250ad7294d2a28de2a09ac9ec5bb8 Mon Sep 17 00:00:00 2001 From: Divyaprabha M Date: Tue, 14 Jan 2020 16:35:51 +0530 Subject: [PATCH 14/19] ENH add multioutput support for RFE --- sklearn/feature_selection/_rfe.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/feature_selection/_rfe.py b/sklearn/feature_selection/_rfe.py index 59ae071a98b87..91312c7dc80f9 100644 --- a/sklearn/feature_selection/_rfe.py +++ b/sklearn/feature_selection/_rfe.py @@ -13,7 +13,6 @@ from ..utils.metaestimators import if_delegate_has_method from ..utils.metaestimators import _safe_split from ..utils.validation import check_is_fitted -from ..utils.multiclass import type_of_target from ..base import BaseEstimator from ..base import MetaEstimatorMixin from ..base import clone From 8107f8d935733b10aeeac06d42e250ddfae308c4 Mon Sep 17 00:00:00 2001 From: Divyaprabha M Date: Tue, 4 Feb 2020 17:03:21 +0530 Subject: [PATCH 15/19] ENH add multioutput support for RFE (#16103) --- doc/whats_new/v0.23.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/doc/whats_new/v0.23.rst b/doc/whats_new/v0.23.rst index 1941aacb7a7b0..c0d80d0ad6495 100644 --- a/doc/whats_new/v0.23.rst +++ b/doc/whats_new/v0.23.rst @@ -127,3 +127,12 @@ Changelog - |Enhancement| improve error message in :func:`utils.validation.column_or_1d`. :pr:`15926` by :user:`Loïc Estève `. + +:mod:`sklearn.feature_selection` +................................. + +- |Enhancement| In :class:`feature_selection.RFE` and + :class:`feature_selection.RFECV` added support for multioutput by setting + the argument multi_output=TRUE. + :pr:`16103` by :user:`Divyaprabha M `. + From 88446b5a11ad3b6dee4bfd6dc668bcad6a4915ab Mon Sep 17 00:00:00 2001 From: Divyaprabha M Date: Tue, 4 Feb 2020 17:58:39 +0530 Subject: [PATCH 16/19] ENH add multioutput support for RFE #16103 --- sklearn/feature_selection/tests/test_rfe.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py index 65b67444a6e68..654675e677a11 100644 --- a/sklearn/feature_selection/tests/test_rfe.py +++ b/sklearn/feature_selection/tests/test_rfe.py @@ -397,10 +397,13 @@ def test_rfe_allow_nan_inf_in_x(cv): rfe.transform(X) -def test_multioutput(): - X = np.array([[1, 2, 3], [2, 3, 4], [3, 4, 5]]) - # create y with more than 1 column - y = np.array([[1, 1], [1, 0], [1, 0]]) +@pytest.mark.parametrize('ClsRFE', [ + RFE, + RFECV + ]) +def test_multioutput(ClsRFE): + X = np.random.normal(size=(10, 3)) + y = np.random.randint(2, size=(10, 2)) clf = RandomForestClassifier(n_estimators=5) - rfe_test = RFE(clf) + rfe_test = ClsRFE(clf) rfe_test.fit(X, y) From 38b3cbcacb373aead958dfcf108bd26d0b2a5b50 Mon Sep 17 00:00:00 2001 From: Divyaprabha M Date: Tue, 4 Feb 2020 18:40:48 +0530 Subject: [PATCH 17/19] ENH add multioutput support for RFE #16103 --- doc/whats_new/v0.23.rst | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/doc/whats_new/v0.23.rst b/doc/whats_new/v0.23.rst index c0d80d0ad6495..4064560c234ad 100644 --- a/doc/whats_new/v0.23.rst +++ b/doc/whats_new/v0.23.rst @@ -131,8 +131,6 @@ Changelog :mod:`sklearn.feature_selection` ................................. -- |Enhancement| In :class:`feature_selection.RFE` and - :class:`feature_selection.RFECV` added support for multioutput by setting - the argument multi_output=TRUE. - :pr:`16103` by :user:`Divyaprabha M `. - +- |Enhancement| Added support for multioutput data in :class:`feature_selection.RFE` + and :class:`feature_selection.RFECV`. + :pr:`16103` by :user:`Divyaprabha `. From 308dceaf66ffb1a953363e2a08b923fd3476b12b Mon Sep 17 00:00:00 2001 From: Divyaprabha M Date: Tue, 4 Feb 2020 19:52:19 +0530 Subject: [PATCH 18/19] ENH add multioutput support for RFE #16103 --- doc/whats_new/v0.23.rst | 133 ++++------------------------------------ 1 file changed, 11 insertions(+), 122 deletions(-) diff --git a/doc/whats_new/v0.23.rst b/doc/whats_new/v0.23.rst index bc93f078ebb39..70b6043924937 100644 --- a/doc/whats_new/v0.23.rst +++ b/doc/whats_new/v0.23.rst @@ -23,7 +23,7 @@ parameters, may produce different models from the previous version. This often occurs due to changes in the modelling logic (bug fixes or enhancements), or in random sampling procedures. -- list models here +- models come here Details are listed in the changelog below. @@ -51,28 +51,9 @@ Changelog more memory efficient implementation of single linkage clustering. :pr:`11514` by :user:`Leland McInnes `. -- |Efficiency| :class:`cluster.Birch` implementation of the predict method - avoids high memory footprint by calculating the distances matrix using - a chunked scheme. - :pr:`16149` by :user:`Jeremie du Boisberranger ` and - :user:`Alex Shacked `. - -:mod:`sklearn.compose` -...................... - -- |Fix| :class:`compose.ColumnTransformer` method ``get_feature_names`` now - returns correct results when one of the transformer steps applies on an - empty list of columns :pr:`15963` by `Roman Yurchak`_. - :mod:`sklearn.datasets` ....................... -- |Enhancement| Added ``return_centers`` parameter in - :func:`datasets.make_blobs`, which can be used to return - centers for each cluster. - :pr:`15709` by :user:`` and - :user:`Venkatachalam N `. - - |Enhancement| Functions :func:`datasets.make_circles` and :func:`datasets.make_moons` now accept two-element tuple. :pr:`15707` by :user:`Maciej J Mikulski `. @@ -82,40 +63,6 @@ Changelog by :user:`Stephanie Andrews ` and :user:`Reshama Shaikh `. -- |Feature| embedded dataset loaders :func:`load_breast_cancer`, - :func:`load_diabetes`, :func:`load_digits`, :func:`load_iris`, - :func:`load_linnerud` and :func:`load_wine` now support loading as a pandas - ``DataFrame`` by setting `as_frame=True`. :pr:`15980` by :user:`wconnell` and - :user:`Reshama Shaikh `. - -- |Fix| :func:`datasets.make_multilabel_classification` now generates - `ValueError` for arguments `n_classes < 1` OR `length < 1`. - :pr:`16006` by :user:`Rushabh Vasani `. - -:mod:`sklearn.decomposition` -............................ - -- |Fix| :class:`decomposition.PCA` with a float `n_components` parameter, will - exclusively choose the components that explain the variance greater than - `n_components`. :pr:`15669` by :user:`Krishna Chaitanya ` - -:mod:`sklearn.ensemble` -....................... - -- |API| Added boolean `verbose` flag to classes: - :class:`ensemble.VotingClassifier` and :class:`ensemble.VotingRegressor`. - :pr:`15991` by :user:`Sam Bail `, - :user:`Hanna Bruce MacDonald `, - :user:`Reshama Shaikh `, and - :user:`Chiara Marmo `. - -- |Fix| Changed the convention for `max_depth` parameter of - :class:`ensemble.HistGradientBoostingClassifier` and - :class:`ensemble.HistGradientBoostingRegressor`. The depth now corresponds to - the number of edges to go from the root to the deepest leaf. - Stumps (trees with one split) are now allowed. - :pr: `16182` by :user:`Santhosh B ` - :mod:`sklearn.feature_extraction` ................................. @@ -128,7 +75,7 @@ Changelog ............................... - |Enhancement| :func:`gaussian_process.kernels.Matern` returns the RBF kernel when ``nu=np.inf``. - :pr:`15503` by :user:`Sam Dixon `. + :pr: `15503` by :user:`Sam Dixon` . :mod:`sklearn.linear_model` ........................... @@ -149,26 +96,6 @@ Changelog :class:`linear_model.RidgeClassifierCV`. :pr:`15653` by :user:`Jérôme Dockès `. -- |Fix| Fixed a bug in :class:`linear_model.RidgeClassifierCV` to pass a - specific scoring strategy. Before the internal estimator outputs score - instead of predictions. - :pr:`14848` by :user:`Venkatachalam N `. - -- |Fix| :class:`linear_model.LogisticRegression` will now avoid an unnecessary - iteration when `solver='newton-cg'` by checking for inferior or equal instead - of strictly inferior for maximum of `absgrad` and `tol` in `utils.optimize._newton_cg`. - :pr:`16266` by :user:`Rushabh Vasani `. - -:mod:`sklearn.metrics` -...................... - -- |Fix| Fixed a bug in :func:`metrics.mean_squared_error` to not ignore - argument `squared` when argument `multioutput='raw_values'`. - :pr:`16323` by :user:`Rushabh Vasani ` - -- |Fix| Fixed a bug in :func:`metrics.mutual_info_score` where negative - scores could be returned. :pr:`16362` by `Thomas Fan`_. - :mod:`sklearn.model_selection` .............................. @@ -180,31 +107,7 @@ Changelog - |Fix| :func: `cross_val_predict` supports `method="predict_proba"` when `y=None`. - :pr:`15918` by :user:`Luca Kubin `. - -:mod:`sklearn.naive_bayes` -............................. - -- |Fix| A correctly formatted error message is shown in - :class:`naive_bayes.CategoricalNB` when the number of features in the input - differs between `predict` and `fit`. - :pr:`16090` by :user:`Madhura Jayaratne `. - -:mod:`sklearn.neighbors` -.............................. - -- |Fix| Fix a bug which converted a list of arrays into a 2-D object - array instead of a 1-D array containing NumPy arrays. This bug - was affecting :meth:`neighbors.NearestNeighbors.radius_neighbors`. - :pr:`16076` by :user:`Guillaume Lemaitre ` and - :user:`Alex Shacked `. - -:mod:`sklearn.neural_network` -............................. - -- |Fix| Increases the numerical stability of the logistic loss function in - :class:`neural_network.MLPClassifier` by clipping the probabilities. - :pr:`16117` by `Thomas Fan`_. + :pr: `15918` by :user: `Luca Kubin `. :mod:`sklearn.preprocessing` ............................ @@ -212,19 +115,6 @@ Changelog - |Efficiency| :class:`preprocessing.OneHotEncoder` is now faster at transforming. :pr:`15762` by `Thomas Fan`_. -:mod:`sklearn.svm` -.................. - -- |API| :class:`svm.SVR` and :class:`svm.OneClassSVM` attributes, `probA_` and - `probB_`, are now deprecated as they were not useful. :pr:`15558` by - `Thomas Fan`_. - -- |Fix| Fix use of custom kernel not taking float entries such as string - kernels in :class:`svm.SVC` and :class:`svm.SVR`. Note that custom kennels - are now expected to validate their input where they previously received - valid numeric arrays. - :pr:`11296` by `Alexandre Gramfort`_ and :user:`Georgi Peev `. - :mod:`sklearn.tree` ................... @@ -232,16 +122,15 @@ Changelog deprecated. :pr:`15806` by :user:`Chiara Marmo `. -- |Fix| Fix support of read-only float32 array input in ``predict``, - ``decision_path`` and ``predict_proba`` methods of - :class:`tree.DecisionTreeClassifier`, :class:`tree.ExtraTreeClassifier` and - :class:`ensemble.GradientBoostingClassifier` as well as ``predict`` method of - :class:`tree.DecisionTreeRegressor`, :class:`tree.ExtraTreeRegressor`, and - :class:`ensemble.GradientBoostingRegressor`. - :pr:`16331` by :user:`Alexandre Batisse `. - :mod:`sklearn.utils` .................... - |Enhancement| improve error message in :func:`utils.validation.column_or_1d`. - :pr:`15926` by :user:`Loïc Estève `. \ No newline at end of file + :pr:`15926` by :user:`Loïc Estève `. + +:mod:`sklearn.feature_selection` +................................ + +- |Enhancement| Added support for multioutput data in :class:`feature_selection.RFE` + and :class:`feature_selection.RFECV`. + :pr:`16103` by :user:`Divyaprabha M `. From 148425084302cdaf40c7848940009508040812c8 Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Tue, 4 Feb 2020 19:20:42 +0100 Subject: [PATCH 19/19] DOC fix merge conflict --- doc/whats_new/v0.23.rst | 138 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 129 insertions(+), 9 deletions(-) diff --git a/doc/whats_new/v0.23.rst b/doc/whats_new/v0.23.rst index 70b6043924937..32716cb9eb694 100644 --- a/doc/whats_new/v0.23.rst +++ b/doc/whats_new/v0.23.rst @@ -23,7 +23,7 @@ parameters, may produce different models from the previous version. This often occurs due to changes in the modelling logic (bug fixes or enhancements), or in random sampling procedures. -- models come here +- list models here Details are listed in the changelog below. @@ -51,9 +51,28 @@ Changelog more memory efficient implementation of single linkage clustering. :pr:`11514` by :user:`Leland McInnes `. +- |Efficiency| :class:`cluster.Birch` implementation of the predict method + avoids high memory footprint by calculating the distances matrix using + a chunked scheme. + :pr:`16149` by :user:`Jeremie du Boisberranger ` and + :user:`Alex Shacked `. + +:mod:`sklearn.compose` +...................... + +- |Fix| :class:`compose.ColumnTransformer` method ``get_feature_names`` now + returns correct results when one of the transformer steps applies on an + empty list of columns :pr:`15963` by `Roman Yurchak`_. + :mod:`sklearn.datasets` ....................... +- |Enhancement| Added ``return_centers`` parameter in + :func:`datasets.make_blobs`, which can be used to return + centers for each cluster. + :pr:`15709` by :user:`` and + :user:`Venkatachalam N `. + - |Enhancement| Functions :func:`datasets.make_circles` and :func:`datasets.make_moons` now accept two-element tuple. :pr:`15707` by :user:`Maciej J Mikulski `. @@ -63,6 +82,40 @@ Changelog by :user:`Stephanie Andrews ` and :user:`Reshama Shaikh `. +- |Feature| embedded dataset loaders :func:`load_breast_cancer`, + :func:`load_diabetes`, :func:`load_digits`, :func:`load_iris`, + :func:`load_linnerud` and :func:`load_wine` now support loading as a pandas + ``DataFrame`` by setting `as_frame=True`. :pr:`15980` by :user:`wconnell` and + :user:`Reshama Shaikh `. + +- |Fix| :func:`datasets.make_multilabel_classification` now generates + `ValueError` for arguments `n_classes < 1` OR `length < 1`. + :pr:`16006` by :user:`Rushabh Vasani `. + +:mod:`sklearn.decomposition` +............................ + +- |Fix| :class:`decomposition.PCA` with a float `n_components` parameter, will + exclusively choose the components that explain the variance greater than + `n_components`. :pr:`15669` by :user:`Krishna Chaitanya ` + +:mod:`sklearn.ensemble` +....................... + +- |API| Added boolean `verbose` flag to classes: + :class:`ensemble.VotingClassifier` and :class:`ensemble.VotingRegressor`. + :pr:`15991` by :user:`Sam Bail `, + :user:`Hanna Bruce MacDonald `, + :user:`Reshama Shaikh `, and + :user:`Chiara Marmo `. + +- |Fix| Changed the convention for `max_depth` parameter of + :class:`ensemble.HistGradientBoostingClassifier` and + :class:`ensemble.HistGradientBoostingRegressor`. The depth now corresponds to + the number of edges to go from the root to the deepest leaf. + Stumps (trees with one split) are now allowed. + :pr: `16182` by :user:`Santhosh B ` + :mod:`sklearn.feature_extraction` ................................. @@ -71,11 +124,16 @@ Changelog for datasets with large vocabularies combined with ``min_df`` or ``max_df``. :pr:`15834` by :user:`Santiago M. Mola `. + +- |Enhancement| Added support for multioutput data in + :class:`feature_selection.RFE` and :class:`feature_selection.RFECV`. + :pr:`16103` by :user:`Divyaprabha M `. + :mod:`sklearn.gaussian_process` ............................... - |Enhancement| :func:`gaussian_process.kernels.Matern` returns the RBF kernel when ``nu=np.inf``. - :pr: `15503` by :user:`Sam Dixon` . + :pr:`15503` by :user:`Sam Dixon `. :mod:`sklearn.linear_model` ........................... @@ -96,6 +154,26 @@ Changelog :class:`linear_model.RidgeClassifierCV`. :pr:`15653` by :user:`Jérôme Dockès `. +- |Fix| Fixed a bug in :class:`linear_model.RidgeClassifierCV` to pass a + specific scoring strategy. Before the internal estimator outputs score + instead of predictions. + :pr:`14848` by :user:`Venkatachalam N `. + +- |Fix| :class:`linear_model.LogisticRegression` will now avoid an unnecessary + iteration when `solver='newton-cg'` by checking for inferior or equal instead + of strictly inferior for maximum of `absgrad` and `tol` in `utils.optimize._newton_cg`. + :pr:`16266` by :user:`Rushabh Vasani `. + +:mod:`sklearn.metrics` +...................... + +- |Fix| Fixed a bug in :func:`metrics.mean_squared_error` to not ignore + argument `squared` when argument `multioutput='raw_values'`. + :pr:`16323` by :user:`Rushabh Vasani ` + +- |Fix| Fixed a bug in :func:`metrics.mutual_info_score` where negative + scores could be returned. :pr:`16362` by `Thomas Fan`_. + :mod:`sklearn.model_selection` .............................. @@ -107,7 +185,31 @@ Changelog - |Fix| :func: `cross_val_predict` supports `method="predict_proba"` when `y=None`. - :pr: `15918` by :user: `Luca Kubin `. + :pr:`15918` by :user:`Luca Kubin `. + +:mod:`sklearn.naive_bayes` +............................. + +- |Fix| A correctly formatted error message is shown in + :class:`naive_bayes.CategoricalNB` when the number of features in the input + differs between `predict` and `fit`. + :pr:`16090` by :user:`Madhura Jayaratne `. + +:mod:`sklearn.neighbors` +.............................. + +- |Fix| Fix a bug which converted a list of arrays into a 2-D object + array instead of a 1-D array containing NumPy arrays. This bug + was affecting :meth:`neighbors.NearestNeighbors.radius_neighbors`. + :pr:`16076` by :user:`Guillaume Lemaitre ` and + :user:`Alex Shacked `. + +:mod:`sklearn.neural_network` +............................. + +- |Fix| Increases the numerical stability of the logistic loss function in + :class:`neural_network.MLPClassifier` by clipping the probabilities. + :pr:`16117` by `Thomas Fan`_. :mod:`sklearn.preprocessing` ............................ @@ -115,6 +217,19 @@ Changelog - |Efficiency| :class:`preprocessing.OneHotEncoder` is now faster at transforming. :pr:`15762` by `Thomas Fan`_. +:mod:`sklearn.svm` +.................. + +- |API| :class:`svm.SVR` and :class:`svm.OneClassSVM` attributes, `probA_` and + `probB_`, are now deprecated as they were not useful. :pr:`15558` by + `Thomas Fan`_. + +- |Fix| Fix use of custom kernel not taking float entries such as string + kernels in :class:`svm.SVC` and :class:`svm.SVR`. Note that custom kennels + are now expected to validate their input where they previously received + valid numeric arrays. + :pr:`11296` by `Alexandre Gramfort`_ and :user:`Georgi Peev `. + :mod:`sklearn.tree` ................... @@ -122,15 +237,20 @@ Changelog deprecated. :pr:`15806` by :user:`Chiara Marmo `. +- |Fix| Fix support of read-only float32 array input in ``predict``, + ``decision_path`` and ``predict_proba`` methods of + :class:`tree.DecisionTreeClassifier`, :class:`tree.ExtraTreeClassifier` and + :class:`ensemble.GradientBoostingClassifier` as well as ``predict`` method of + :class:`tree.DecisionTreeRegressor`, :class:`tree.ExtraTreeRegressor`, and + :class:`ensemble.GradientBoostingRegressor`. + :pr:`16331` by :user:`Alexandre Batisse `. + :mod:`sklearn.utils` .................... - |Enhancement| improve error message in :func:`utils.validation.column_or_1d`. :pr:`15926` by :user:`Loïc Estève `. -:mod:`sklearn.feature_selection` -................................ - -- |Enhancement| Added support for multioutput data in :class:`feature_selection.RFE` - and :class:`feature_selection.RFECV`. - :pr:`16103` by :user:`Divyaprabha M `. +- |Enhancement| add warning in :func:`utils.validation.check_array` for + pandas sparse DataFrame. + :pr:`16021` by :user:`Rushabh Vasani `.