From 719fcf3cf1d80f963bb871ee73cbcf47dcd21e18 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 2 Aug 2022 15:07:16 +0200 Subject: [PATCH 001/229] FEA add LearningCurveDisplay --- sklearn/model_selection/__init__.py | 3 + sklearn/model_selection/_plot.py | 176 +++++++++++++++++++++ sklearn/model_selection/tests/test_plot.py | 45 ++++++ 3 files changed, 224 insertions(+) create mode 100644 sklearn/model_selection/_plot.py create mode 100644 sklearn/model_selection/tests/test_plot.py diff --git a/sklearn/model_selection/__init__.py b/sklearn/model_selection/__init__.py index a481f5db72fdf..76dc02e625408 100644 --- a/sklearn/model_selection/__init__.py +++ b/sklearn/model_selection/__init__.py @@ -32,6 +32,8 @@ from ._search import ParameterGrid from ._search import ParameterSampler +from ._plot import LearningCurveDisplay + if typing.TYPE_CHECKING: # Avoid errors in type checkers (e.g. mypy) for experimental estimators. # TODO: remove this check once the estimator is no longer experimental. @@ -68,6 +70,7 @@ "cross_val_score", "cross_validate", "learning_curve", + "LearningCurveDisplay", "permutation_test_score", "train_test_split", "validation_curve", diff --git a/sklearn/model_selection/_plot.py b/sklearn/model_selection/_plot.py new file mode 100644 index 0000000000000..da04117c9af79 --- /dev/null +++ b/sklearn/model_selection/_plot.py @@ -0,0 +1,176 @@ +import numpy as np + +from . import learning_curve +from ..utils import check_matplotlib_support + + +class LearningCurveDisplay: + def __init__(self, *, train_sizes, train_scores, test_scores, score_name): + self.train_sizes = train_sizes + self.train_scores = train_scores + self.test_scores = test_scores + self.score_name = score_name + + def plot( + self, + ax=None, + *, + is_score=True, + score_name=None, + log_scale=False, + std_display_style="errorbar", + line_kw=None, + fill_between_kw=None, + errorbar_kw=None, + ): + check_matplotlib_support(f"{self.__class__.__name__}.plot") + + import matplotlib.pyplot as plt + + if ax is None: + _, ax = plt.subplots() + + if is_score: + train_scores, test_scores = self.train_scores, self.test_scores + label = "Score" + else: + train_scores, test_scores = -self.train_scores, -self.test_scores + label = "Error" + + if std_display_style == "errorbar": + if errorbar_kw is None: + errorbar_kw = {} + errorbar_train = ax.errorbar( + x=self.train_sizes, + y=train_scores.mean(axis=1), + yerr=train_scores.std(axis=1), + label=f"Training {label}", + **errorbar_kw, + ) + errorbar_test = ax.errorbar( + x=self.train_sizes, + y=test_scores.mean(axis=1), + yerr=test_scores.std(axis=1), + label=f"Testing {label}", + **errorbar_kw, + ) + self.errorbar_ = [errorbar_train, errorbar_test] + self.line_, self.fill_between_ = None, None + elif std_display_style == "fill_between": + if line_kw is None: + line_kw = {} + if fill_between_kw is None: + fill_between_kw = {} + default_fill_between_kw = {"alpha": 0.5} + fill_between_kw = {**default_fill_between_kw, **fill_between_kw} + line_train = ax.plot( + self.train_sizes, + train_scores.mean(axis=1), + label=f"Training {label}", + **line_kw, + ) + fill_between_train = ax.fill_between( + x=self.train_sizes, + y1=train_scores.mean(axis=1) - train_scores.std(axis=1), + y2=train_scores.mean(axis=1) + train_scores.std(axis=1), + **fill_between_kw, + ) + line_test = ax.plot( + self.train_sizes, + test_scores.mean(axis=1), + label=f"Testing {label}", + **line_kw, + ) + fill_between_test = ax.fill_between( + x=self.train_sizes, + y1=test_scores.mean(axis=1) - test_scores.std(axis=1), + y2=test_scores.mean(axis=1) + test_scores.std(axis=1), + **fill_between_kw, + ) + self.line_ = line_train + line_test + self.fill_between_ = [fill_between_train, fill_between_test] + self.errorbar_ = None + else: + raise ValueError( + f"Unknown std_display_style: {std_display_style}. Should be one of" + " 'errorbar' or 'fill_between'" + ) + + score_name = self.score_name if score_name is None else score_name + + ax.legend() + if log_scale: + ax.set_xscale("log") + ax.set_xlabel("Number of samples in the training set") + ax.set_ylabel(f"{score_name}") + + self.ax_ = ax + self.figure_ = ax.figure + return self + + @classmethod + def from_estimator( + cls, + estimator, + X, + y, + *, + groups=None, + train_sizes=np.linspace(0.1, 1.0, 5), + cv=None, + scoring=None, + exploit_incremental_learning=False, + n_jobs=None, + pre_dispatch="all", + verbose=0, + shuffle=False, + random_state=None, + error_score=np.nan, + fit_params=None, + ax=None, + is_score=True, + score_name=None, + log_scale=False, + std_display_style="errorbar", + line_kw=None, + fill_between_kw=None, + errorbar_kw=None, + ): + check_matplotlib_support(f"{cls.__name__}.from_estimator") + + score_name = "Score" if score_name is None else score_name + + train_sizes, train_scores, test_scores = learning_curve( + estimator, + X, + y, + groups=groups, + train_sizes=train_sizes, + cv=cv, + scoring=scoring, + exploit_incremental_learning=exploit_incremental_learning, + n_jobs=n_jobs, + pre_dispatch=pre_dispatch, + verbose=verbose, + shuffle=shuffle, + random_state=random_state, + error_score=error_score, + return_times=False, + fit_params=fit_params, + ) + + viz = cls( + train_sizes=train_sizes, + train_scores=train_scores, + test_scores=test_scores, + score_name=score_name, + ) + return viz.plot( + ax=ax, + is_score=is_score, + log_scale=log_scale, + std_display_style=std_display_style, + line_kw=line_kw, + fill_between_kw=fill_between_kw, + errorbar_kw=errorbar_kw, + ) diff --git a/sklearn/model_selection/tests/test_plot.py b/sklearn/model_selection/tests/test_plot.py new file mode 100644 index 0000000000000..cbc5f239873ce --- /dev/null +++ b/sklearn/model_selection/tests/test_plot.py @@ -0,0 +1,45 @@ +import pytest + +from sklearn.datasets import load_iris +from sklearn.tree import DecisionTreeClassifier +from sklearn.utils import shuffle +from sklearn.utils._testing import assert_allclose, assert_array_equal + +from sklearn.model_selection import learning_curve +from sklearn.model_selection import LearningCurveDisplay + + +@pytest.fixture +def data(): + return shuffle(*load_iris(return_X_y=True), random_state=0) + + +def test_learnig_curve_display_default_usage(pyplot, data): + """Check the default usage of the LearningCurveDisplay class.""" + X, y = data + estimator = DecisionTreeClassifier(random_state=0) + + train_sizes = [0.3, 0.6, 0.9] + display = LearningCurveDisplay.from_estimator( + estimator, X, y, train_sizes=train_sizes + ) + + import matplotlib as mpl + + assert isinstance(display.errorbar_, list) + for eb in display.errorbar_: + assert isinstance(eb, mpl.container.ErrorbarContainer) + assert eb.get_label() in ["Training Score", "Testing Score"] + assert display.line_ is None + assert display.fill_between_ is None + assert display.score_name == "Score" + assert display.ax_.get_xlabel() == "Number of samples in the training set" + assert display.ax_.get_ylabel() == "Score" + + train_sizes_abs, train_scores, test_scores = learning_curve( + estimator, X, y, train_sizes=train_sizes + ) + + assert_array_equal(display.train_sizes, train_sizes_abs) + assert_allclose(display.train_scores, train_scores) + assert_allclose(display.test_scores, test_scores) From cc9bf3eea5e2fa6a47da85a3d292a7102192018a Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 2 Aug 2022 15:10:39 +0200 Subject: [PATCH 002/229] DOC prepare placeholder for doc --- doc/modules/classes.rst | 6 ++++++ doc/whats_new/v1.2.rst | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst index c6838556d50ad..0606aee224ac4 100644 --- a/doc/modules/classes.rst +++ b/doc/modules/classes.rst @@ -1247,6 +1247,12 @@ Model validation model_selection.permutation_test_score model_selection.validation_curve +.. autosummary:: + :toctree: generated/ + :template: class.rst + + model_selection.LearningCurveDisplay + .. _multiclass_ref: :mod:`sklearn.multiclass`: Multiclass classification diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index ba51e28229462..ee84bf4e33766 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -230,6 +230,14 @@ Changelog :user:`Lucy Liu `, and :user:`Guillaume Lemaitre `. +:mod:`sklearn.model_selection` +.............................. + +- |Feature| Added the class :class:`model_selection.LearningCurveDisplay` + that allows to make easy plotting of learning curves obtained by the function + :func:`model_selection.learning_curve`. + :pr:`xxx` by :user:`Guillaume Lemaitre `. + :mod:`sklearn.naive_bayes` .......................... From a1912b1e0a72001a3237d56a8bd3d558573b6aa6 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 2 Aug 2022 15:12:07 +0200 Subject: [PATCH 003/229] update whats new --- doc/whats_new/v1.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index ee84bf4e33766..8f728ab0f1959 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -236,7 +236,7 @@ Changelog - |Feature| Added the class :class:`model_selection.LearningCurveDisplay` that allows to make easy plotting of learning curves obtained by the function :func:`model_selection.learning_curve`. - :pr:`xxx` by :user:`Guillaume Lemaitre `. + :pr:`24084` by :user:`Guillaume Lemaitre `. :mod:`sklearn.naive_bayes` .......................... From 223082f36ef8dcf3832ba3696a0eb1b8382ee41b Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 2 Aug 2022 15:26:32 +0200 Subject: [PATCH 004/229] wip doc --- doc/visualizations.rst | 1 + sklearn/model_selection/_plot.py | 66 +++++++++++++++++++++++++++++++- 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/doc/visualizations.rst b/doc/visualizations.rst index 0c6590335232a..94a0798011896 100644 --- a/doc/visualizations.rst +++ b/doc/visualizations.rst @@ -101,3 +101,4 @@ Display Objects metrics.DetCurveDisplay metrics.PrecisionRecallDisplay metrics.RocCurveDisplay + model_selection.LearningCurveDisplay diff --git a/sklearn/model_selection/_plot.py b/sklearn/model_selection/_plot.py index da04117c9af79..3cfd44e14c2e6 100644 --- a/sklearn/model_selection/_plot.py +++ b/sklearn/model_selection/_plot.py @@ -5,7 +5,71 @@ class LearningCurveDisplay: - def __init__(self, *, train_sizes, train_scores, test_scores, score_name): + """Learning Curve visualization. + + It is recommended to use + :meth:`~sklearn.model_selection.LearningCurveDisplay.from_estimator` to + create a :class:`~sklearn.model_selection.LearningCurveDisplay` instance. + All parameters are stored as attributes. + + Read more in the :ref:`User Guide `. + + .. versionadded:: 1.2 + + Parameters + ---------- + train_sizes : ndarray of shape (n_unique_ticks,) + Numbers of training examples that has been used to generate the + learning curve. + + train_scores : ndarray of shape (n_ticks, n_cv_folds) + Scores on training sets. + + test_scores : ndarray of shape (n_ticks, n_cv_folds) + Scores on test set. + + score_name : str, default=None + The name of the score used in `learning_curve`. If `None`, the string + `"Score"` is used. + + Attributes + ---------- + ax_ : matplotlib Axes + Axes with the learning curve. + + figure_ : matplotlib Figure + Figure containing the learning curve. + + errorbar_ : list of matplotlib Artist or None + When the `std_display_style` is `"errorbar"`, this is a list of + `matplotlib.container.ErrorbarContainer` objects. If another style is + used, `errorbar_` is `None`. + + line_ : list of matplotlib Artist or None + When the `std_display_style` is `"fill_between"`, this is a list of + `matplotlib.lines.Line2D` objects corresponding to the mean train and + test scores. If another style is used, `line_` is `None`. + + fill_between_ : list of matplotlib Artist or None + When the `std_display_style` is `"fill_between"`, this is a list of + `matplotlib.collections.PolyCollection` objects. If another style is + used, `fill_between_` is `None`. + + Examples + -------- + >>> import matplotlib.pyplot as plt + >>> from sklearn.datasets import load_iris + >>> from sklearn.model_selection import LearningCurveDisplay + >>> from sklearn.tree import DecisionTreeClassifier + >>> X, y = load_iris(return_X_y=True) + >>> tree = DecisionTreeClassifier(random_state=0) + >>> display = LearningCurveDisplay.from_estimator(tree, X, y) + >>> display.plot() + <...> + >>> plt.show() + """ + + def __init__(self, *, train_sizes, train_scores, test_scores, score_name=None): self.train_sizes = train_sizes self.train_scores = train_scores self.test_scores = test_scores From d5ed76519a800da3bf4010fd69abc436ef94e41c Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 2 Aug 2022 17:15:37 +0200 Subject: [PATCH 005/229] DOC update API docstrings --- sklearn/model_selection/_plot.py | 218 +++++++++++++++++++++++++++++-- 1 file changed, 204 insertions(+), 14 deletions(-) diff --git a/sklearn/model_selection/_plot.py b/sklearn/model_selection/_plot.py index 3cfd44e14c2e6..c36d5cfca2d30 100644 --- a/sklearn/model_selection/_plot.py +++ b/sklearn/model_selection/_plot.py @@ -28,9 +28,9 @@ class LearningCurveDisplay: test_scores : ndarray of shape (n_ticks, n_cv_folds) Scores on test set. - score_name : str, default=None - The name of the score used in `learning_curve`. If `None`, the string - `"Score"` is used. + score_name : str, default="Score" + The name of the score used in `learning_curve`. It will be used to + decorate the y-axis. Attributes ---------- @@ -59,17 +59,20 @@ class LearningCurveDisplay: -------- >>> import matplotlib.pyplot as plt >>> from sklearn.datasets import load_iris - >>> from sklearn.model_selection import LearningCurveDisplay + >>> from sklearn.model_selection import LearningCurveDisplay, learning_curve >>> from sklearn.tree import DecisionTreeClassifier >>> X, y = load_iris(return_X_y=True) >>> tree = DecisionTreeClassifier(random_state=0) - >>> display = LearningCurveDisplay.from_estimator(tree, X, y) + >>> train_sizes, train_scores, test_scores = learning_curve( + ... tree, X, y) + >>> display = LearningCurveDisplay(train_sizes=train_sizes, + ... train_scores=train_scores, test_scores=test_scores) >>> display.plot() <...> >>> plt.show() """ - def __init__(self, *, train_sizes, train_scores, test_scores, score_name=None): + def __init__(self, *, train_sizes, train_scores, test_scores, score_name="Score"): self.train_sizes = train_sizes self.train_scores = train_scores self.test_scores = test_scores @@ -79,14 +82,57 @@ def plot( self, ax=None, *, - is_score=True, - score_name=None, + negate=True, + score_name="Score", log_scale=False, std_display_style="errorbar", line_kw=None, fill_between_kw=None, errorbar_kw=None, ): + """Plot visualization. + + Parameters + ---------- + ax : matplotlib Axes, default=None + Axes object to plot on. If `None`, a new figure and axes is + created. + + negate : bool, default=False + Whether or not to negate the scores obtained through + :func:`~sklearn.model_selection.learning_curve`. This is + particularly useful when using the error denoted by `neg_*` in + `scikit-learn`. + + score_name : str, default="Score" + The name of the score used to decorate the y-axis of the plot. + + log_scale : bool, default=False + Whether or not to use a logarithmic scale for the x-axis. + + std_display_style : {"errorbar", "fill_between"}, default="errorbar" + The style used to display the score standard deviation around the + mean score. + + line_kw : dict, default=None + Additional keyword arguments passed to the `plt.plot` used to draw + the mean score. Ignored when `std_display_style != "fill_between"`. + + fill_between_kw : dict, default=None + Additional keyword arguments passed to the `plt.fill_between` used + to draw the score standard deviation. Ignored when + `std_display_style != "fill_between"`. + + errorbar_kw : dict, default=None + Additional keyword arguments passed to the `plt.errorbar` used to + draw mean score and standard deviation score. Ignored when + `std_display_style != "errorbar"`. + + Returns + ------- + display : :class:`~sklearn.model_selection.LearningCurveDisplay` + Object that stores computed values. + """ check_matplotlib_support(f"{self.__class__.__name__}.plot") import matplotlib.pyplot as plt @@ -94,12 +140,12 @@ def plot( if ax is None: _, ax = plt.subplots() - if is_score: - train_scores, test_scores = self.train_scores, self.test_scores - label = "Score" - else: + if negate: train_scores, test_scores = -self.train_scores, -self.test_scores label = "Error" + else: + train_scores, test_scores = self.train_scores, self.test_scores + label = "Score" if std_display_style == "errorbar": if errorbar_kw is None: @@ -192,7 +238,7 @@ def from_estimator( error_score=np.nan, fit_params=None, ax=None, - is_score=True, + negate=False, score_name=None, log_scale=False, std_display_style="errorbar", @@ -200,6 +246,150 @@ def from_estimator( fill_between_kw=None, errorbar_kw=None, ): + """Create a learning curve display from an estimator. + + Parameters + ---------- + estimator : object type that implements the "fit" and "predict" methods + An object of that type which is cloned for each validation. + + X : array-like of shape (n_samples, n_features) + Training vector, where `n_samples` is the number of samples and + `n_features` is the number of features. + + y : array-like of shape (n_samples,) or (n_samples, n_outputs) + Target relative to X for classification or regression; + None for unsupervised learning. + + groups : array-like of shape (n_samples,), default=None + Group labels for the samples used while splitting the dataset into + train/test set. Only used in conjunction with a "Group" :term:`cv` + instance (e.g., :class:`GroupKFold`). + + train_sizes : array-like of shape (n_ticks,), \ + default=np.linspace(0.1, 1.0, 5) + Relative or absolute numbers of training examples that will be used + to generate the learning curve. If the dtype is float, it is + regarded as a fraction of the maximum size of the training set + (that is determined by the selected validation method), i.e. it has + to be within (0, 1]. Otherwise it is interpreted as absolute sizes + of the training sets. Note that for classification the number of + samples usually have to be big enough to contain at least one + sample from each class. + + cv : int, cross-validation generator or an iterable, default=None + Determines the cross-validation splitting strategy. + Possible inputs for cv are: + + - None, to use the default 5-fold cross validation, + - int, to specify the number of folds in a `(Stratified)KFold`, + - :term:`CV splitter`, + - An iterable yielding (train, test) splits as arrays of indices. + + For int/None inputs, if the estimator is a classifier and ``y`` is + either binary or multiclass, :class:`StratifiedKFold` is used. In + all other cases, :class:`KFold` is used. These splitters are + instantiated with `shuffle=False` so the splits will be the same + across calls. + + Refer :ref:`User Guide ` for the various + cross-validation strategies that can be used here. + + scoring : str or callable, default=None + A str (see model evaluation documentation) or + a scorer callable object / function with signature + ``scorer(estimator, X, y)``. + + exploit_incremental_learning : bool, default=False + If the estimator supports incremental learning, this will be + used to speed up fitting for different training set sizes. + + n_jobs : int, default=None + Number of jobs to run in parallel. Training the estimator and + computing the score are parallelized over the different training + and test sets. ``None`` means 1 unless in a + :obj:`joblib.parallel_backend` context. ``-1`` means using all + processors. See :term:`Glossary ` for more details. + + pre_dispatch : int or str, default='all' + Number of predispatched jobs for parallel execution (default is + all). The option can reduce the allocated memory. The str can + be an expression like '2*n_jobs'. + + verbose : int, default=0 + Controls the verbosity: the higher, the more messages. + + shuffle : bool, default=False + Whether to shuffle training data before taking prefixes of it + based on``train_sizes``. + + random_state : int, RandomState instance or None, default=None + Used when ``shuffle`` is True. Pass an int for reproducible + output across multiple function calls. + See :term:`Glossary `. + + error_score : 'raise' or numeric, default=np.nan + Value to assign to the score if an error occurs in estimator + fitting. If set to 'raise', the error is raised. If a numeric value + is given, FitFailedWarning is raised. + + return_times : bool, default=False + Whether to return the fit and score times. + + fit_params : dict, default=None + Parameters to pass to the fit method of the estimator. + + ax : matplotlib Axes, default=None + Axes object to plot on. If `None`, a new figure and axes is + created. + + negate : bool, default=False + Whether or not to negate the scores obtained through + :func:`~sklearn.model_selection.learning_curve`. This is + particularly useful when using the error denoted by `neg_*` in + `scikit-learn`. + + score_name : str, default="Score" + The name of the score used to decorate the y-axis of the plot. + + log_scale : bool, default=False + Whether or not to use a logarithmic scale for the x-axis. + + std_display_style : {"errorbar", "fill_between"}, default="errorbar" + The style used to display the score standard deviation around the + mean score. + + line_kw : dict, default=None + Additional keyword arguments passed to the `plt.plot` used to draw + the mean score. Ignored when `std_display_style != "fill_between"`. + + fill_between_kw : dict, default=None + Additional keyword arguments passed to the `plt.fill_between` used + to draw the score standard deviation. Ignored when + `std_display_style != "fill_between"`. + + errorbar_kw : dict, default=None + Additional keyword arguments passed to the `plt.errorbar` used to + draw mean score and standard deviation score. Ignored when + `std_display_style != "errorbar"`. + + Returns + ------- + display : :class:`~sklearn.model_selection.LearningCurveDisplay` + Object that stores computed values. + + Examples + -------- + >>> import matplotlib.pyplot as plt + >>> from sklearn.datasets import load_iris + >>> from sklearn.model_selection import LearningCurveDisplay + >>> from sklearn.tree import DecisionTreeClassifier + >>> X, y = load_iris(return_X_y=True) + >>> tree = DecisionTreeClassifier(random_state=0) + >>> LearningCurveDisplay.from_estimator(tree, X, y) + <...> + >>> plt.show() + """ check_matplotlib_support(f"{cls.__name__}.from_estimator") score_name = "Score" if score_name is None else score_name @@ -231,7 +421,7 @@ def from_estimator( ) return viz.plot( ax=ax, - is_score=is_score, + negate=negate, log_scale=log_scale, std_display_style=std_display_style, line_kw=line_kw, From 3a4c7d9b6743c76fd99f8a72bb4a4c4103e6aceb Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 2 Aug 2022 17:56:25 +0200 Subject: [PATCH 006/229] reformat first example --- .../plot_kernel_ridge_regression.py | 42 ++--- sklearn/model_selection/_plot.py | 149 ++++++++++-------- 2 files changed, 101 insertions(+), 90 deletions(-) diff --git a/examples/miscellaneous/plot_kernel_ridge_regression.py b/examples/miscellaneous/plot_kernel_ridge_regression.py index dd696443d6b31..deac696bbd308 100644 --- a/examples/miscellaneous/plot_kernel_ridge_regression.py +++ b/examples/miscellaneous/plot_kernel_ridge_regression.py @@ -189,35 +189,27 @@ # %% # Visualize the learning curves # ----------------------------- +from sklearn.model_selection import LearningCurveDisplay -from sklearn.model_selection import learning_curve - -plt.figure() +_, ax = plt.subplots() svr = SVR(kernel="rbf", C=1e1, gamma=0.1) kr = KernelRidge(kernel="rbf", alpha=0.1, gamma=0.1) -train_sizes, train_scores_svr, test_scores_svr = learning_curve( - svr, - X[:100], - y[:100], - train_sizes=np.linspace(0.1, 1, 10), - scoring="neg_mean_squared_error", - cv=10, -) -train_sizes_abs, train_scores_kr, test_scores_kr = learning_curve( - kr, - X[:100], - y[:100], - train_sizes=np.linspace(0.1, 1, 10), - scoring="neg_mean_squared_error", - cv=10, -) -plt.plot(train_sizes, -test_scores_kr.mean(1), "o--", color="g", label="KRR") -plt.plot(train_sizes, -test_scores_svr.mean(1), "o--", color="r", label="SVR") -plt.xlabel("Train size") -plt.ylabel("Mean Squared Error") -plt.title("Learning curves") -plt.legend(loc="best") +common_params = { + "X": X[:100], + "y": y[:100], + "train_sizes": np.linspace(0.1, 1, 10), + "scoring": "neg_mean_squared_error", + "negate": True, + "score_name": "Mean Squared Error", + "cv": 10, + "ax": ax, +} + +LearningCurveDisplay.from_estimator(svr, **common_params) +LearningCurveDisplay.from_estimator(kr, **common_params) +ax.set_title("Learning curves") +ax.legend(handles=ax.get_legend_handles_labels()[0], labels=["SVR", "KRR"]) plt.show() diff --git a/sklearn/model_selection/_plot.py b/sklearn/model_selection/_plot.py index c36d5cfca2d30..71f7b51e7e7cc 100644 --- a/sklearn/model_selection/_plot.py +++ b/sklearn/model_selection/_plot.py @@ -84,8 +84,9 @@ def plot( *, negate=True, score_name="Score", + score_type="test", log_scale=False, - std_display_style="errorbar", + std_display_style=None, line_kw=None, fill_between_kw=None, errorbar_kw=None, @@ -107,26 +108,29 @@ def plot( score_name : str, default="Score" The name of the score used to decorate the y-axis of the plot. + score_type : {"test", "train", "both"}, default="test" + The type of score to plot. Can be one of `"test"`, `"train"`, or + `"both"`. + log_scale : bool, default=False Whether or not to use a logarithmic scale for the x-axis. - std_display_style : {"errorbar", "fill_between"}, default="errorbar" + std_display_style : {"errorbar", "fill_between"}, default=None The style used to display the score standard deviation around the - mean score. + mean score. If None, no standard deviation representation is + displayed. line_kw : dict, default=None Additional keyword arguments passed to the `plt.plot` used to draw - the mean score. Ignored when `std_display_style != "fill_between"`. + the mean score. fill_between_kw : dict, default=None Additional keyword arguments passed to the `plt.fill_between` used - to draw the score standard deviation. Ignored when - `std_display_style != "fill_between"`. + to draw the score standard deviation. errorbar_kw : dict, default=None Additional keyword arguments passed to the `plt.errorbar` used to - draw mean score and standard deviation score. Ignored when - `std_display_style != "errorbar"`. + draw mean score and standard deviation score. Returns ------- @@ -147,64 +151,74 @@ def plot( train_scores, test_scores = self.train_scores, self.test_scores label = "Score" + if std_display_style not in ("errorbar", "fill_between", None): + raise ValueError( + f"Unknown std_display_style: {std_display_style}. Should be one of" + " 'errorbar', 'fill_between', or None." + ) + + if score_type not in ("test", "train", "both"): + raise ValueError( + f"Unknown score_type: {score_type}. Should be one of 'test', " + "'train', or 'both'." + ) + + if score_type == "train": + scores = {f"Training {label}": train_scores} + elif score_type == "test": + scores = {f"Test {label}": test_scores} + else: # score_type == "both" + scores = {f"Training {label}": train_scores, f"Test {label}": test_scores} + + if std_display_style in ("fill_between", None): + # plot the mean score + if line_kw is None: + line_kw = {} + + self.lines_ = [] + for line_label, score in scores.items(): + self.lines_.append( + *ax.plot( + self.train_sizes, + score.mean(axis=1), + label=line_label, + **line_kw, + ) + ) + self.errorbar_ = None + self.fill_between_ = None # overwritten below by fill_between + if std_display_style == "errorbar": if errorbar_kw is None: errorbar_kw = {} - errorbar_train = ax.errorbar( - x=self.train_sizes, - y=train_scores.mean(axis=1), - yerr=train_scores.std(axis=1), - label=f"Training {label}", - **errorbar_kw, - ) - errorbar_test = ax.errorbar( - x=self.train_sizes, - y=test_scores.mean(axis=1), - yerr=test_scores.std(axis=1), - label=f"Testing {label}", - **errorbar_kw, - ) - self.errorbar_ = [errorbar_train, errorbar_test] + + for line_label, score in scores.items(): + self.errobar_.append( + ax.errorbar( + self.train_sizes, + score.mean(axis=1), + score.std(axis=1), + label=line_label, + **errorbar_kw, + ) + ) self.line_, self.fill_between_ = None, None elif std_display_style == "fill_between": - if line_kw is None: - line_kw = {} if fill_between_kw is None: fill_between_kw = {} default_fill_between_kw = {"alpha": 0.5} fill_between_kw = {**default_fill_between_kw, **fill_between_kw} - line_train = ax.plot( - self.train_sizes, - train_scores.mean(axis=1), - label=f"Training {label}", - **line_kw, - ) - fill_between_train = ax.fill_between( - x=self.train_sizes, - y1=train_scores.mean(axis=1) - train_scores.std(axis=1), - y2=train_scores.mean(axis=1) + train_scores.std(axis=1), - **fill_between_kw, - ) - line_test = ax.plot( - self.train_sizes, - test_scores.mean(axis=1), - label=f"Testing {label}", - **line_kw, - ) - fill_between_test = ax.fill_between( - x=self.train_sizes, - y1=test_scores.mean(axis=1) - test_scores.std(axis=1), - y2=test_scores.mean(axis=1) + test_scores.std(axis=1), - **fill_between_kw, - ) - self.line_ = line_train + line_test - self.fill_between_ = [fill_between_train, fill_between_test] - self.errorbar_ = None - else: - raise ValueError( - f"Unknown std_display_style: {std_display_style}. Should be one of" - " 'errorbar' or 'fill_between'" - ) + + for line_label, score in scores.items(): + self.fill_between_.append( + ax.fill_between( + self.train_sizes, + score.mean(axis=1) - score.std(axis=1), + score.mean(axis=1) + score.std(axis=1), + label=line_label, + **fill_between_kw, + ) + ) score_name = self.score_name if score_name is None else score_name @@ -240,8 +254,9 @@ def from_estimator( ax=None, negate=False, score_name=None, + score_type="test", log_scale=False, - std_display_style="errorbar", + std_display_style=None, line_kw=None, fill_between_kw=None, errorbar_kw=None, @@ -352,26 +367,29 @@ def from_estimator( score_name : str, default="Score" The name of the score used to decorate the y-axis of the plot. + score_type : {"test", "train", "both"}, default="test" + The type of score to plot. Can be one of `"test"`, `"train"`, or + `"both"`. + log_scale : bool, default=False Whether or not to use a logarithmic scale for the x-axis. - std_display_style : {"errorbar", "fill_between"}, default="errorbar" + std_display_style : {"errorbar", "fill_between"}, default=None The style used to display the score standard deviation around the - mean score. + mean score. If `None, no representation of the standard deviation + is displayed. line_kw : dict, default=None Additional keyword arguments passed to the `plt.plot` used to draw - the mean score. Ignored when `std_display_style != "fill_between"`. + the mean score. fill_between_kw : dict, default=None Additional keyword arguments passed to the `plt.fill_between` used - to draw the score standard deviation. Ignored when - `std_display_style != "fill_between"`. + to draw the score standard deviation. errorbar_kw : dict, default=None Additional keyword arguments passed to the `plt.errorbar` used to - draw mean score and standard deviation score. Ignored when - `std_display_style != "errorbar"`. + draw mean score and standard deviation score. Returns ------- @@ -422,6 +440,7 @@ def from_estimator( return viz.plot( ax=ax, negate=negate, + score_type=score_type, log_scale=log_scale, std_display_style=std_display_style, line_kw=line_kw, From a269f7f164e8005d5d361d0b3c47088c2ad3d350 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 3 Aug 2022 16:48:43 +0200 Subject: [PATCH 007/229] DOC refactor example --- doc/modules/learning_curve.rst | 7 +- .../model_selection/plot_learning_curve.py | 370 ++++++++---------- sklearn/model_selection/_plot.py | 19 +- 3 files changed, 183 insertions(+), 213 deletions(-) diff --git a/doc/modules/learning_curve.rst b/doc/modules/learning_curve.rst index 1287fe1b5779c..7377112596a6d 100644 --- a/doc/modules/learning_curve.rst +++ b/doc/modules/learning_curve.rst @@ -94,8 +94,8 @@ The function :func:`validation_curve` can help in this case:: If the training score and the validation score are both low, the estimator will be underfitting. If the training score is high and the validation score is low, the estimator is overfitting and otherwise it is working very well. A low -training score and a high validation score is usually not possible. Underfitting, -overfitting, and a working model are shown in the in the plot below where we vary +training score and a high validation score is usually not possible. Underfitting, +overfitting, and a working model are shown in the in the plot below where we vary the parameter :math:`\gamma` of an SVM on the digits dataset. .. figure:: ../auto_examples/model_selection/images/sphx_glr_plot_validation_curve_001.png @@ -112,7 +112,7 @@ Learning curve A learning curve shows the validation and training score of an estimator for varying numbers of training samples. It is a tool to find out how much we benefit from adding more training data and whether the estimator suffers -more from a variance error or a bias error. Consider the following example +more from a variance error or a bias error. Coensider th following example where we plot the learning curve of a naive Bayes classifier and an SVM. For the naive Bayes, both the validation score and the training score @@ -148,4 +148,3 @@ average scores on the validation sets):: array([[1. , 0.93..., 1. , 1. , 0.96...], [1. , 0.96..., 1. , 1. , 0.96...], [1. , 0.96..., 1. , 1. , 0.96...]]) - diff --git a/examples/model_selection/plot_learning_curve.py b/examples/model_selection/plot_learning_curve.py index 5430f673d76a5..8b8a6861c9984 100644 --- a/examples/model_selection/plot_learning_curve.py +++ b/examples/model_selection/plot_learning_curve.py @@ -1,218 +1,184 @@ """ -======================== -Plotting Learning Curves -======================== -In the first column, first row the learning curve of a naive Bayes classifier -is shown for the digits dataset. Note that the training score and the -cross-validation score are both not very good at the end. However, the shape -of the curve can be found in more complex datasets very often: the training -score is very high at the beginning and decreases and the cross-validation -score is very low at the beginning and increases. In the second column, first -row we see the learning curve of an SVM with RBF kernel. We can see clearly -that the training score is still around the maximum and the validation score -could be increased with more training samples. The plots in the second row -show the times required by the models to train with various sizes of training -dataset. The plots in the third row show how much time was required to train -the models for each training sizes. - +========================================================= +Plotting Learning Curves and Checking Models' Scalability +========================================================= + +In this example, we show how to use the class +:class:`~sklearn.model_selection.LearningCurveDisplay` to easily plot learning +curves. In addition, we provide two interpretation of the learning curves +obtained for a naive Bayes and SVM classifiers. + +Then, we go into details by looking at the scalability of these predictive +models by looking at their computation cost and not only at their statistical +accuracy. We therefore draw some conclusions about the scalability of these +models. """ -import numpy as np -import matplotlib.pyplot as plt +# %% +# Learning Curve +# ============== +# +# Learning curves show the effect on adding more samples during the training +# process. The effect is depicted by checking the statistical performance of +# the model in terms of training score and testing score (also called +# cross-validation score). +# +# Here, we compute the learning curve of a naive Bayes classifier and an SVM +# classifier with a RBF kernel using the digits dataset. +from sklearn.datasets import load_digits from sklearn.naive_bayes import GaussianNB from sklearn.svm import SVC -from sklearn.datasets import load_digits + +X, y = load_digits(return_X_y=True) +naive_bayes = GaussianNB() +svc = SVC(kernel="rbf", gamma=0.001) + +# %% +# The :meth:`~sklearn.model_selection.LearningCurveDisplay.from_estimator` +# displays the learning curve given the dataset and the predictive model to +# analyze. To get an estimate of the scores uncertainty, this method uses +# a cross-validation procedure. +import matplotlib.pyplot as plt +import numpy as np +from sklearn.model_selection import LearningCurveDisplay, ShuffleSplit + +fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 6), sharey=True) + +common_params = { + "X": X, + "y": y, + "train_sizes": np.linspace(0.1, 1.0, 5), + "cv": ShuffleSplit(n_splits=50, test_size=0.2, random_state=0), + "score_type": "both", + "n_jobs": 4, + "line_kw": {"marker": "o"}, + "std_display_style": "fill_between", + "score_name": "Accuracy", +} + +for ax_idx, estimator in enumerate([naive_bayes, svc]): + LearningCurveDisplay.from_estimator(estimator, **common_params, ax=ax[ax_idx]) + handles, label = ax[ax_idx].get_legend_handles_labels() + ax[ax_idx].legend(handles[:2], ["Training Score", "Cross-Validation Score"]) + ax[ax_idx].set_title(f"Learning Curve for {estimator.__class__.__name__}") + +# %% +# We first analyze the learning curve of the naive Bayes classifier. We observe +# that the training and cross-validation scores are not very good when all the +# samples are used for training. +# +# However, the shape of the curve can be found in more complex datasets very +# often: the training score is very high at the beginning and decreases and the +# cross-validation score is very low at the beginning and increases. +# +# We see another typical learning curve for the SVM classifier with RBF kernel. +# The training score remains high whatever the size of the training dataset. +# The cross-validation, on the other hand, is increasing with the size of the +# training dataset. Indeed, it increases up to a point where the +# cross-validation score reach a plateau. Observing such a plateau inform us +# that it might not be useful to acquire new data to train the model since the +# generalization performance of the model will not increase anymore. +# +# Complexity analysis +# =================== +# +# In addition to these learning curves, it is also possible to look at the +# scalability of the predictive models in terms of training and scoring times. +# +# The :class:`~sklearn.model_selection.LearningCurveDisplay` class does not +# provide such information. We need to resort to the +# :func:`~sklearn.model_selection.learning_curve` function instead and make +# the plot manually. + +# %% from sklearn.model_selection import learning_curve -from sklearn.model_selection import ShuffleSplit - - -def plot_learning_curve( - estimator, - title, - X, - y, - axes=None, - ylim=None, - cv=None, - n_jobs=None, - scoring=None, - train_sizes=np.linspace(0.1, 1.0, 5), -): - """ - Generate 3 plots: the test and training learning curve, the training - samples vs fit times curve, the fit times vs score curve. - - Parameters - ---------- - estimator : estimator instance - An estimator instance implementing `fit` and `predict` methods which - will be cloned for each validation. - - title : str - Title for the chart. - - X : array-like of shape (n_samples, n_features) - Training vector, where ``n_samples`` is the number of samples and - ``n_features`` is the number of features. - - y : array-like of shape (n_samples) or (n_samples, n_features) - Target relative to ``X`` for classification or regression; - None for unsupervised learning. - - axes : array-like of shape (3,), default=None - Axes to use for plotting the curves. - - ylim : tuple of shape (2,), default=None - Defines minimum and maximum y-values plotted, e.g. (ymin, ymax). - - cv : int, cross-validation generator or an iterable, default=None - Determines the cross-validation splitting strategy. - Possible inputs for cv are: - - - None, to use the default 5-fold cross-validation, - - integer, to specify the number of folds. - - :term:`CV splitter`, - - An iterable yielding (train, test) splits as arrays of indices. - - For integer/None inputs, if ``y`` is binary or multiclass, - :class:`StratifiedKFold` used. If the estimator is not a classifier - or if ``y`` is neither binary nor multiclass, :class:`KFold` is used. - - Refer :ref:`User Guide ` for the various - cross-validators that can be used here. - - n_jobs : int or None, default=None - Number of jobs to run in parallel. - ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. - ``-1`` means using all processors. See :term:`Glossary ` - for more details. - - scoring : str or callable, default=None - A str (see model evaluation documentation) or - a scorer callable object / function with signature - ``scorer(estimator, X, y)``. - - train_sizes : array-like of shape (n_ticks,) - Relative or absolute numbers of training examples that will be used to - generate the learning curve. If the ``dtype`` is float, it is regarded - as a fraction of the maximum size of the training set (that is - determined by the selected validation method), i.e. it has to be within - (0, 1]. Otherwise it is interpreted as absolute sizes of the training - sets. Note that for classification the number of samples usually have - to be big enough to contain at least one sample from each class. - (default: np.linspace(0.1, 1.0, 5)) - """ - if axes is None: - _, axes = plt.subplots(1, 3, figsize=(20, 5)) - - axes[0].set_title(title) - if ylim is not None: - axes[0].set_ylim(*ylim) - axes[0].set_xlabel("Training examples") - axes[0].set_ylabel("Score") - - train_sizes, train_scores, test_scores, fit_times, _ = learning_curve( - estimator, - X, - y, - scoring=scoring, - cv=cv, - n_jobs=n_jobs, - train_sizes=train_sizes, - return_times=True, - ) - train_scores_mean = np.mean(train_scores, axis=1) - train_scores_std = np.std(train_scores, axis=1) - test_scores_mean = np.mean(test_scores, axis=1) - test_scores_std = np.std(test_scores, axis=1) - fit_times_mean = np.mean(fit_times, axis=1) - fit_times_std = np.std(fit_times, axis=1) - - # Plot learning curve - axes[0].grid() - axes[0].fill_between( - train_sizes, - train_scores_mean - train_scores_std, - train_scores_mean + train_scores_std, - alpha=0.1, - color="r", + +common_params = { + "X": X, + "y": y, + "train_sizes": np.linspace(0.1, 1.0, 5), + "cv": ShuffleSplit(n_splits=50, test_size=0.2, random_state=0), + "n_jobs": 4, + "return_times": True, +} + +train_sizes, _, test_scores_nb, fit_times_nb, score_times_nb = learning_curve( + naive_bayes, **common_params +) +train_sizes, _, test_scores_svm, fit_times_svm, score_times_svm = learning_curve( + svc, **common_params +) + +# %% +fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(16, 12), sharex=True) + +for ax_idx, (fit_times, score_times, estimator) in enumerate( + zip( + [fit_times_nb, fit_times_svm], + [score_times_nb, score_times_svm], + [naive_bayes, svc], ) - axes[0].fill_between( +): + # scalability regarding the fit time + ax[0, ax_idx].plot(train_sizes, fit_times.mean(axis=1), "o-") + ax[0, ax_idx].fill_between( train_sizes, - test_scores_mean - test_scores_std, - test_scores_mean + test_scores_std, - alpha=0.1, - color="g", + fit_times.mean(axis=1) - fit_times.std(axis=1), + fit_times.mean(axis=1) + fit_times.std(axis=1), + alpha=0.3, ) - axes[0].plot( - train_sizes, train_scores_mean, "o-", color="r", label="Training score" + ax[0, ax_idx].set_ylabel("Fit time (s)") + ax[0, ax_idx].set_title( + f"Scalability of the {estimator.__class__.__name__} classifier" ) - axes[0].plot( - train_sizes, test_scores_mean, "o-", color="g", label="Cross-validation score" - ) - axes[0].legend(loc="best") - # Plot n_samples vs fit_times - axes[1].grid() - axes[1].plot(train_sizes, fit_times_mean, "o-") - axes[1].fill_between( + # scalability regarding the score time + ax[1, ax_idx].plot(train_sizes, score_times.mean(axis=1), "o-") + ax[1, ax_idx].fill_between( train_sizes, - fit_times_mean - fit_times_std, - fit_times_mean + fit_times_std, - alpha=0.1, + score_times.mean(axis=1) - score_times.std(axis=1), + score_times.mean(axis=1) + score_times.std(axis=1), + alpha=0.3, ) - axes[1].set_xlabel("Training examples") - axes[1].set_ylabel("fit_times") - axes[1].set_title("Scalability of the model") - - # Plot fit_time vs score - fit_time_argsort = fit_times_mean.argsort() - fit_time_sorted = fit_times_mean[fit_time_argsort] - test_scores_mean_sorted = test_scores_mean[fit_time_argsort] - test_scores_std_sorted = test_scores_std[fit_time_argsort] - axes[2].grid() - axes[2].plot(fit_time_sorted, test_scores_mean_sorted, "o-") - axes[2].fill_between( - fit_time_sorted, - test_scores_mean_sorted - test_scores_std_sorted, - test_scores_mean_sorted + test_scores_std_sorted, - alpha=0.1, + ax[1, ax_idx].set_ylabel("Score time (s)") + ax[1, ax_idx].set_xlabel("Number of training samples") + +# %% +# We see that the scalability of the naive Bayes and SVM classifiers is very +# different. The naive Bayes classifier complexity at fit and score time +# remains constant in relation to the number of training samples. In contrast, +# the SVM classifier complexity at fit and score time increases linearly with +# the number of training samples. +# +# Subsequently, we can check the trade-off between increased training time and +# the cross-validation score. + +# %% +fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(16, 6)) + +for ax_idx, (fit_times, test_scores, estimator) in enumerate( + zip( + [fit_times_nb, fit_times_svm], + [test_scores_nb, test_scores_svm], + [naive_bayes, svc], + ) +): + ax[ax_idx].plot(fit_times.mean(axis=1), test_scores.mean(axis=1), "o-") + ax[ax_idx].fill_between( + fit_times.mean(axis=1), + test_scores.mean(axis=1) - test_scores.std(axis=1), + test_scores.mean(axis=1) + test_scores.std(axis=1), + alpha=0.3, + ) + ax[ax_idx].set_ylabel("Accuracy") + ax[ax_idx].set_xlabel("Fit time (s)") + ax[ax_idx].set_title( + f"Performance of the {estimator.__class__.__name__} classifier" ) - axes[2].set_xlabel("fit_times") - axes[2].set_ylabel("Score") - axes[2].set_title("Performance of the model") - - return plt - - -fig, axes = plt.subplots(3, 2, figsize=(10, 15)) - -X, y = load_digits(return_X_y=True) - -title = "Learning Curves (Naive Bayes)" -# Cross validation with 50 iterations to get smoother mean test and train -# score curves, each time with 20% data randomly selected as a validation set. -cv = ShuffleSplit(n_splits=50, test_size=0.2, random_state=0) - -estimator = GaussianNB() -plot_learning_curve( - estimator, - title, - X, - y, - axes=axes[:, 0], - ylim=(0.7, 1.01), - cv=cv, - n_jobs=4, - scoring="accuracy", -) - -title = r"Learning Curves (SVM, RBF kernel, $\gamma=0.001$)" -# SVC is more expensive so we do a lower number of CV iterations: -cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=0) -estimator = SVC(gamma=0.001) -plot_learning_curve( - estimator, title, X, y, axes=axes[:, 1], ylim=(0.7, 1.01), cv=cv, n_jobs=4 -) plt.show() + +# %% +# In these plots, we can look for the inflection point for which the +# cross-validation score does not increase anymore and only the training time +# increases. diff --git a/sklearn/model_selection/_plot.py b/sklearn/model_selection/_plot.py index 71f7b51e7e7cc..9da51ad8c5c03 100644 --- a/sklearn/model_selection/_plot.py +++ b/sklearn/model_selection/_plot.py @@ -28,9 +28,10 @@ class LearningCurveDisplay: test_scores : ndarray of shape (n_ticks, n_cv_folds) Scores on test set. - score_name : str, default="Score" + score_name : str, default=None The name of the score used in `learning_curve`. It will be used to - decorate the y-axis. + decorate the y-axis. If `None`, the generic name `"Score"` will be + used. Attributes ---------- @@ -72,7 +73,7 @@ class LearningCurveDisplay: >>> plt.show() """ - def __init__(self, *, train_sizes, train_scores, test_scores, score_name="Score"): + def __init__(self, *, train_sizes, train_scores, test_scores, score_name=None): self.train_sizes = train_sizes self.train_scores = train_scores self.test_scores = test_scores @@ -83,7 +84,7 @@ def plot( ax=None, *, negate=True, - score_name="Score", + score_name=None, score_type="test", log_scale=False, std_display_style=None, @@ -105,8 +106,9 @@ def plot( particularly useful when using the error denoted by `neg_*` in `scikit-learn`. - score_name : str, default="Score" - The name of the score used to decorate the y-axis of the plot. + score_name : str, default=None + The name of the score used to decorate the y-axis of the plot. If + `None`, the generic name "Score" will be used. score_type : {"test", "train", "both"}, default="test" The type of score to plot. Can be one of `"test"`, `"train"`, or @@ -192,6 +194,7 @@ def plot( if errorbar_kw is None: errorbar_kw = {} + self.errorbar_ = [] for line_label, score in scores.items(): self.errobar_.append( ax.errorbar( @@ -209,6 +212,7 @@ def plot( default_fill_between_kw = {"alpha": 0.5} fill_between_kw = {**default_fill_between_kw, **fill_between_kw} + self.fill_between_ = [] for line_label, score in scores.items(): self.fill_between_.append( ax.fill_between( @@ -364,8 +368,9 @@ def from_estimator( particularly useful when using the error denoted by `neg_*` in `scikit-learn`. - score_name : str, default="Score" + score_name : str, default=None The name of the score used to decorate the y-axis of the plot. + If `None`, the generic `"Score"` name will be used. score_type : {"test", "train", "both"}, default="test" The type of score to plot. Can be one of `"test"`, `"train"`, or From 9488d0e4ebafcb6cc6505771cba42e2c06be0879 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 3 Aug 2022 16:54:03 +0200 Subject: [PATCH 008/229] DOC add more information about the display in the user guide --- doc/modules/learning_curve.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/doc/modules/learning_curve.rst b/doc/modules/learning_curve.rst index 7377112596a6d..ec84460d0abbc 100644 --- a/doc/modules/learning_curve.rst +++ b/doc/modules/learning_curve.rst @@ -148,3 +148,17 @@ average scores on the validation sets):: array([[1. , 0.93..., 1. , 1. , 0.96...], [1. , 0.96..., 1. , 1. , 0.96...], [1. , 0.96..., 1. , 1. , 0.96...]]) + +If you intend to plot the learning curves only, the class +:class:`~sklearn.model_selection.LearningCurveDisplay` will be easier to use. +You can use the method +:meth:`~sklearn.model_selection.LearningCurveDisplay.from_estimator` similarly +to :func:`learning_curve` to generate and plot the learning curve: + +.. plot:: + :context: + :align: center + + from sklearn.model_selection import LearningCurveDisplay + LearningCurveDisplay.from_estimator( + VVC(kernel="linear"), X, y, train_sizes=[50, 80, 110], cv=5) From 500b231b1a46e50299014d5d7dc0a22131e628f5 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 4 Aug 2022 10:14:21 +0200 Subject: [PATCH 009/229] fix --- sklearn/model_selection/_plot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/model_selection/_plot.py b/sklearn/model_selection/_plot.py index 9da51ad8c5c03..f8270d1ffdfa9 100644 --- a/sklearn/model_selection/_plot.py +++ b/sklearn/model_selection/_plot.py @@ -196,7 +196,7 @@ def plot( self.errorbar_ = [] for line_label, score in scores.items(): - self.errobar_.append( + self.errorbar_.append( ax.errorbar( self.train_sizes, score.mean(axis=1), From 62a97dffd6a8f117bc933294a6b82e32be01c15e Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Tue, 2 Aug 2022 11:50:07 +0200 Subject: [PATCH 010/229] MAINT Do not version *.pyc* via .gitignore (#24081) --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index f4125316d7d41..f3d2dc08ca954 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -*.pyc +*.pyc* *.so *.pyd *~ From 3daffdda3411b3644511a1dc423f692b15e6c4cc Mon Sep 17 00:00:00 2001 From: Maren Westermann Date: Tue, 2 Aug 2022 18:36:17 +0200 Subject: [PATCH 011/229] FIX Convergence Warnings in Gaussian process examples (#18019) Co-authored-by: Maren Westermann Co-authored-by: Thomas J. Fan --- examples/gaussian_process/plot_gpc_xor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/gaussian_process/plot_gpc_xor.py b/examples/gaussian_process/plot_gpc_xor.py index 6eebbcf80098e..6e6217dba8b9e 100644 --- a/examples/gaussian_process/plot_gpc_xor.py +++ b/examples/gaussian_process/plot_gpc_xor.py @@ -29,7 +29,7 @@ # fit the model plt.figure(figsize=(10, 5)) -kernels = [1.0 * RBF(length_scale=1.0), 1.0 * DotProduct(sigma_0=1.0) ** 2] +kernels = [1.0 * RBF(length_scale=1.15), 1.0 * DotProduct(sigma_0=1.0) ** 2] for i, kernel in enumerate(kernels): clf = GaussianProcessClassifier(kernel=kernel, warm_start=True).fit(X, Y) From 0ced5caae328c2a596b5eace3246b82ff838f8c6 Mon Sep 17 00:00:00 2001 From: Vincent M Date: Wed, 3 Aug 2022 13:51:52 +0200 Subject: [PATCH 012/229] DOC Ensures that mutual_info_score passes numpydoc validation (#24091) --- sklearn/metrics/cluster/_supervised.py | 8 ++++---- sklearn/tests/test_docstrings.py | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py index 2f2f55fcd2156..4012795d7b0c5 100644 --- a/sklearn/metrics/cluster/_supervised.py +++ b/sklearn/metrics/cluster/_supervised.py @@ -768,14 +768,14 @@ def mutual_info_score(labels_true, labels_pred, *, contingency=None): Mutual information, a non-negative value, measured in nats using the natural logarithm. - Notes - ----- - The logarithm used is the natural logarithm (base-e). - See Also -------- adjusted_mutual_info_score : Adjusted against chance Mutual Information. normalized_mutual_info_score : Normalized Mutual Information. + + Notes + ----- + The logarithm used is the natural logarithm (base-e). """ if contingency is None: labels_true, labels_pred = check_clusterings(labels_true, labels_pred) diff --git a/sklearn/tests/test_docstrings.py b/sklearn/tests/test_docstrings.py index da45dbb9eca8e..d8194a520540b 100644 --- a/sklearn/tests/test_docstrings.py +++ b/sklearn/tests/test_docstrings.py @@ -39,7 +39,6 @@ "sklearn.metrics.cluster._supervised.adjusted_rand_score", "sklearn.metrics.cluster._supervised.entropy", "sklearn.metrics.cluster._supervised.fowlkes_mallows_score", - "sklearn.metrics.cluster._supervised.mutual_info_score", "sklearn.metrics.cluster._supervised.normalized_mutual_info_score", "sklearn.metrics.cluster._supervised.pair_confusion_matrix", "sklearn.metrics.cluster._supervised.rand_score", From 276739633954a09e9b1b43c6db57291424986575 Mon Sep 17 00:00:00 2001 From: Vincent M Date: Wed, 3 Aug 2022 13:53:07 +0200 Subject: [PATCH 013/229] DOC Ensures that normalized_mutual_info_score passes numpydoc validation(#24093) Co-authored-by: Guillaume Lemaitre --- sklearn/tests/test_docstrings.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/tests/test_docstrings.py b/sklearn/tests/test_docstrings.py index d8194a520540b..6365309513939 100644 --- a/sklearn/tests/test_docstrings.py +++ b/sklearn/tests/test_docstrings.py @@ -39,7 +39,6 @@ "sklearn.metrics.cluster._supervised.adjusted_rand_score", "sklearn.metrics.cluster._supervised.entropy", "sklearn.metrics.cluster._supervised.fowlkes_mallows_score", - "sklearn.metrics.cluster._supervised.normalized_mutual_info_score", "sklearn.metrics.cluster._supervised.pair_confusion_matrix", "sklearn.metrics.cluster._supervised.rand_score", "sklearn.metrics.cluster._supervised.v_measure_score", From b56b3669d565f15e77ed93d2de70cd5233ad08dd Mon Sep 17 00:00:00 2001 From: Maascha <63260880+Maascha@users.noreply.github.com> Date: Wed, 3 Aug 2022 16:06:28 +0200 Subject: [PATCH 014/229] DOC spectral biclustering: corrected contradiction of warning and documentation for parameter method (#24098) --- sklearn/cluster/_bicluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/cluster/_bicluster.py b/sklearn/cluster/_bicluster.py index a360802009f2c..c8b46fa38c6d1 100644 --- a/sklearn/cluster/_bicluster.py +++ b/sklearn/cluster/_bicluster.py @@ -385,7 +385,7 @@ class SpectralBiclustering(BaseSpectral): default is 'bistochastic'. .. warning:: - if `method='log'`, the data must be sparse. + if `method='log'`, the data must not be sparse. n_components : int, default=6 Number of singular vectors to check. From 544cb6cdbbcfad0a6117598bb028d24bd446518f Mon Sep 17 00:00:00 2001 From: Vincent M Date: Wed, 3 Aug 2022 16:23:33 +0200 Subject: [PATCH 015/229] DOC Ensures that `pair_confusion_matrix` passes numpydoc validation (#24094) Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/cluster/_supervised.py | 20 ++++++++++---------- sklearn/tests/test_docstrings.py | 1 - 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py index 4012795d7b0c5..3add417716089 100644 --- a/sklearn/metrics/cluster/_supervised.py +++ b/sklearn/metrics/cluster/_supervised.py @@ -159,7 +159,7 @@ def contingency_matrix( def pair_confusion_matrix(labels_true, labels_pred): - """Pair confusion matrix arising from two clusterings. + """Pair confusion matrix arising from two clusterings [1]_. The pair confusion matrix :math:`C` computes a 2 by 2 similarity matrix between two clusterings by considering all pairs of samples and counting @@ -188,9 +188,15 @@ def pair_confusion_matrix(labels_true, labels_pred): See Also -------- - rand_score: Rand Score - adjusted_rand_score: Adjusted Rand Score - adjusted_mutual_info_score: Adjusted Mutual Information + rand_score: Rand Score. + adjusted_rand_score: Adjusted Rand Score. + adjusted_mutual_info_score: Adjusted Mutual Information. + + References + ---------- + .. [1] :doi:`Hubert, L., Arabie, P. "Comparing partitions." + Journal of Classification 2, 193–218 (1985). + <10.1007/BF01908075>` Examples -------- @@ -211,12 +217,6 @@ def pair_confusion_matrix(labels_true, labels_pred): [0, 2]]... Note that the matrix is not symmetric. - - References - ---------- - .. L. Hubert and P. Arabie, Comparing Partitions, Journal of - Classification 1985 - https://link.springer.com/article/10.1007%2FBF01908075 """ labels_true, labels_pred = check_clusterings(labels_true, labels_pred) n_samples = np.int64(labels_true.shape[0]) diff --git a/sklearn/tests/test_docstrings.py b/sklearn/tests/test_docstrings.py index 6365309513939..88d37ef7b232d 100644 --- a/sklearn/tests/test_docstrings.py +++ b/sklearn/tests/test_docstrings.py @@ -39,7 +39,6 @@ "sklearn.metrics.cluster._supervised.adjusted_rand_score", "sklearn.metrics.cluster._supervised.entropy", "sklearn.metrics.cluster._supervised.fowlkes_mallows_score", - "sklearn.metrics.cluster._supervised.pair_confusion_matrix", "sklearn.metrics.cluster._supervised.rand_score", "sklearn.metrics.cluster._supervised.v_measure_score", "sklearn.metrics.pairwise.pairwise_distances_chunked", From e8e46685a99f2aa73bfc2cd0cbce306ec8f32b63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Thu, 4 Aug 2022 11:27:22 +0200 Subject: [PATCH 016/229] FIX utils.multiclass.type_of_target with numpy 1.24 dev (#24044) Co-authored-by: Julien Jerphanion --- sklearn/utils/multiclass.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py index 5311076e64eb8..c72846f9aa323 100644 --- a/sklearn/utils/multiclass.py +++ b/sklearn/utils/multiclass.py @@ -150,7 +150,7 @@ def is_multilabel(y): warnings.simplefilter("error", np.VisibleDeprecationWarning) try: y = np.asarray(y) - except np.VisibleDeprecationWarning: + except (np.VisibleDeprecationWarning, ValueError): # dtype=object should be provided explicitly for ragged arrays, # see NEP 34 y = np.array(y, dtype=object) @@ -292,7 +292,7 @@ def type_of_target(y, input_name=""): warnings.simplefilter("error", np.VisibleDeprecationWarning) try: y = np.asarray(y) - except np.VisibleDeprecationWarning: + except (np.VisibleDeprecationWarning, ValueError): # dtype=object should be provided explicitly for ragged arrays, # see NEP 34 y = np.asarray(y, dtype=object) From 57c90462b281f32f6fc88fb19d8069533d58741c Mon Sep 17 00:00:00 2001 From: Vincent M Date: Thu, 4 Aug 2022 16:15:24 +0200 Subject: [PATCH 017/229] DOC Ensures `v_measure_score` passes numpydoc validation (#24097) --- sklearn/metrics/cluster/_supervised.py | 20 +++++++++----------- sklearn/tests/test_docstrings.py | 1 - 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py index 3add417716089..3a9121b3fd1e0 100644 --- a/sklearn/metrics/cluster/_supervised.py +++ b/sklearn/metrics/cluster/_supervised.py @@ -642,16 +642,15 @@ def v_measure_score(labels_true, labels_pred, *, beta=1.0): measure the agreement of two independent label assignments strategies on the same dataset when the real ground truth is not known. - Read more in the :ref:`User Guide `. Parameters ---------- labels_true : int array, shape = [n_samples] - ground truth class labels to be used as a reference + Ground truth class labels to be used as a reference. labels_pred : array-like of shape (n_samples,) - cluster labels to evaluate + Cluster labels to evaluate. beta : float, default=1.0 Ratio of weight attributed to ``homogeneity`` vs ``completeness``. @@ -662,7 +661,13 @@ def v_measure_score(labels_true, labels_pred, *, beta=1.0): Returns ------- v_measure : float - score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling + Score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling. + + See Also + -------- + homogeneity_score : Homogeneity metric of cluster labeling. + completeness_score : Completeness metric of cluster labeling. + normalized_mutual_info_score : Normalized Mutual Information. References ---------- @@ -671,15 +676,8 @@ def v_measure_score(labels_true, labels_pred, *, beta=1.0): conditional entropy-based external cluster evaluation measure `_ - See Also - -------- - homogeneity_score - completeness_score - normalized_mutual_info_score - Examples -------- - Perfect labelings are both homogeneous and complete, hence have score 1.0:: >>> from sklearn.metrics.cluster import v_measure_score diff --git a/sklearn/tests/test_docstrings.py b/sklearn/tests/test_docstrings.py index 88d37ef7b232d..5b05e4ae04d9a 100644 --- a/sklearn/tests/test_docstrings.py +++ b/sklearn/tests/test_docstrings.py @@ -40,7 +40,6 @@ "sklearn.metrics.cluster._supervised.entropy", "sklearn.metrics.cluster._supervised.fowlkes_mallows_score", "sklearn.metrics.cluster._supervised.rand_score", - "sklearn.metrics.cluster._supervised.v_measure_score", "sklearn.metrics.pairwise.pairwise_distances_chunked", "sklearn.preprocessing._data.maxabs_scale", "sklearn.preprocessing._data.scale", From 91b0bef39ca12c0ef0ca4807580c4d0fce8ed281 Mon Sep 17 00:00:00 2001 From: Maren Westermann Date: Fri, 5 Aug 2022 08:11:21 +0200 Subject: [PATCH 018/229] TST use global_random_seed in sklearn/cluster/tests/test_bicluster.py (#23927) Co-authored-by: Maren Westermann --- sklearn/cluster/tests/test_bicluster.py | 55 +++++++++++++++---------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/sklearn/cluster/tests/test_bicluster.py b/sklearn/cluster/tests/test_bicluster.py index 184fe3891804e..b7d189d660740 100644 --- a/sklearn/cluster/tests/test_bicluster.py +++ b/sklearn/cluster/tests/test_bicluster.py @@ -60,7 +60,7 @@ def _test_shape_indices(model): assert len(j_ind) == n -def test_spectral_coclustering(): +def test_spectral_coclustering(global_random_seed): # Test Dhillon's Spectral CoClustering on a simple problem. param_grid = { "svd_method": ["randomized", "arpack"], @@ -69,14 +69,15 @@ def test_spectral_coclustering(): "init": ["k-means++"], "n_init": [10], } - random_state = 0 - S, rows, cols = make_biclusters((30, 30), 3, noise=0.5, random_state=random_state) + S, rows, cols = make_biclusters( + (30, 30), 3, noise=0.1, random_state=global_random_seed + ) S -= S.min() # needs to be nonnegative before making it sparse S = np.where(S < 1, 0, S) # threshold some values for mat in (S, csr_matrix(S)): for kwargs in ParameterGrid(param_grid): model = SpectralCoclustering( - n_clusters=3, random_state=random_state, **kwargs + n_clusters=3, random_state=global_random_seed, **kwargs ) model.fit(mat) @@ -88,9 +89,11 @@ def test_spectral_coclustering(): _test_shape_indices(model) -def test_spectral_biclustering(): +def test_spectral_biclustering(global_random_seed): # Test Kluger methods on a checkerboard dataset. - S, rows, cols = make_checkerboard((30, 30), 3, noise=0.5, random_state=0) + S, rows, cols = make_checkerboard( + (30, 30), 3, noise=0.5, random_state=global_random_seed + ) non_default_params = { "method": ["scale", "log"], @@ -107,7 +110,7 @@ def test_spectral_biclustering(): n_clusters=3, n_init=3, init="k-means++", - random_state=0, + random_state=global_random_seed, ) model.set_params(**dict([(param_name, param_value)])) @@ -145,8 +148,8 @@ def _do_bistochastic_test(scaled): assert_almost_equal(scaled.sum(axis=0).mean(), scaled.sum(axis=1).mean(), decimal=1) -def test_scale_normalize(): - generator = np.random.RandomState(0) +def test_scale_normalize(global_random_seed): + generator = np.random.RandomState(global_random_seed) X = generator.rand(100, 100) for mat in (X, csr_matrix(X)): scaled, _, _ = _scale_normalize(mat) @@ -155,8 +158,8 @@ def test_scale_normalize(): assert issparse(scaled) -def test_bistochastic_normalize(): - generator = np.random.RandomState(0) +def test_bistochastic_normalize(global_random_seed): + generator = np.random.RandomState(global_random_seed) X = generator.rand(100, 100) for mat in (X, csr_matrix(X)): scaled = _bistochastic_normalize(mat) @@ -165,24 +168,24 @@ def test_bistochastic_normalize(): assert issparse(scaled) -def test_log_normalize(): +def test_log_normalize(global_random_seed): # adding any constant to a log-scaled matrix should make it # bistochastic - generator = np.random.RandomState(0) + generator = np.random.RandomState(global_random_seed) mat = generator.rand(100, 100) scaled = _log_normalize(mat) + 1 _do_bistochastic_test(scaled) -def test_fit_best_piecewise(): - model = SpectralBiclustering(random_state=0) +def test_fit_best_piecewise(global_random_seed): + model = SpectralBiclustering(random_state=global_random_seed) vectors = np.array([[0, 0, 0, 1, 1, 1], [2, 2, 2, 3, 3, 3], [0, 1, 2, 3, 4, 5]]) best = model._fit_best_piecewise(vectors, n_best=2, n_clusters=2) assert_array_equal(best, vectors[:2]) -def test_project_and_cluster(): - model = SpectralBiclustering(random_state=0) +def test_project_and_cluster(global_random_seed): + model = SpectralBiclustering(random_state=global_random_seed) data = np.array([[1, 1, 1], [1, 1, 1], [3, 6, 3], [3, 6, 3]]) vectors = np.array([[1, 0], [0, 1], [0, 0]]) for mat in (data, csr_matrix(data)): @@ -190,19 +193,27 @@ def test_project_and_cluster(): assert_almost_equal(v_measure_score(labels, [0, 0, 1, 1]), 1.0) -def test_perfect_checkerboard(): +def test_perfect_checkerboard(global_random_seed): # XXX Previously failed on build bot (not reproducible) - model = SpectralBiclustering(3, svd_method="arpack", random_state=0) + model = SpectralBiclustering( + 3, svd_method="arpack", random_state=global_random_seed + ) - S, rows, cols = make_checkerboard((30, 30), 3, noise=0, random_state=0) + S, rows, cols = make_checkerboard( + (30, 30), 3, noise=0, random_state=global_random_seed + ) model.fit(S) assert consensus_score(model.biclusters_, (rows, cols)) == 1 - S, rows, cols = make_checkerboard((40, 30), 3, noise=0, random_state=0) + S, rows, cols = make_checkerboard( + (40, 30), 3, noise=0, random_state=global_random_seed + ) model.fit(S) assert consensus_score(model.biclusters_, (rows, cols)) == 1 - S, rows, cols = make_checkerboard((30, 40), 3, noise=0, random_state=0) + S, rows, cols = make_checkerboard( + (30, 40), 3, noise=0, random_state=global_random_seed + ) model.fit(S) assert consensus_score(model.biclusters_, (rows, cols)) == 1 From ecd95cce189cf92986ea9a1e05b19c9fc9549ab2 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Fri, 5 Aug 2022 09:09:39 +0200 Subject: [PATCH 019/229] MAINT fix the way to call stats.mode (#23633) Co-authored-by: Olivier Grisel Co-authored-by: Meekail Zain <34613774+Micky774@users.noreply.github.com> Co-authored-by: Thomas J. Fan --- sklearn/impute/_base.py | 4 ++-- sklearn/neighbors/_classification.py | 4 ++-- sklearn/utils/fixes.py | 7 +++++++ sklearn/utils/tests/test_extmath.py | 4 ++-- 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py index 1842a0527891a..225ae249b2107 100644 --- a/sklearn/impute/_base.py +++ b/sklearn/impute/_base.py @@ -9,10 +9,10 @@ import numpy as np import numpy.ma as ma from scipy import sparse as sp -from scipy import stats from ..base import BaseEstimator, TransformerMixin from ..utils._param_validation import StrOptions +from ..utils.fixes import _mode from ..utils.sparsefuncs import _get_median from ..utils.validation import check_is_fitted from ..utils.validation import FLOAT_DTYPES @@ -52,7 +52,7 @@ def _most_frequent(array, extra_value, n_repeat): if count == most_frequent_count ) else: - mode = stats.mode(array) + mode = _mode(array) most_frequent_value = mode[0][0] most_frequent_count = mode[1][0] else: diff --git a/sklearn/neighbors/_classification.py b/sklearn/neighbors/_classification.py index 25ee67728e1e7..eebd615b2491c 100644 --- a/sklearn/neighbors/_classification.py +++ b/sklearn/neighbors/_classification.py @@ -10,7 +10,7 @@ from numbers import Integral import numpy as np -from scipy import stats +from ..utils.fixes import _mode from ..utils.extmath import weighted_mode from ..utils.validation import _is_arraylike, _num_samples @@ -249,7 +249,7 @@ def predict(self, X): y_pred = np.empty((n_queries, n_outputs), dtype=classes_[0].dtype) for k, classes_k in enumerate(classes_): if weights is None: - mode, _ = stats.mode(_y[neigh_ind, k], axis=1) + mode, _ = _mode(_y[neigh_ind, k], axis=1) else: mode, _ = weighted_mode(_y[neigh_ind, k], weights, axis=1) diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py index b0074ae7e3a18..cdd63e00cd381 100644 --- a/sklearn/utils/fixes.py +++ b/sklearn/utils/fixes.py @@ -163,3 +163,10 @@ def threadpool_info(): threadpool_info.__doc__ = threadpoolctl.threadpool_info.__doc__ + + +# TODO: Remove when SciPy 1.9 is the minimum supported version +def _mode(a, axis=0): + if sp_version >= parse_version("1.9.0"): + return scipy.stats.mode(a, axis=axis, keepdims=True) + return scipy.stats.mode(a, axis=axis) diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py index 07a553c8cf09d..14e541bbef2dc 100644 --- a/sklearn/utils/tests/test_extmath.py +++ b/sklearn/utils/tests/test_extmath.py @@ -6,7 +6,6 @@ import numpy as np from scipy import sparse from scipy import linalg -from scipy import stats from scipy.sparse.linalg import eigsh from scipy.special import expit @@ -19,6 +18,7 @@ from sklearn.utils._testing import assert_array_equal from sklearn.utils._testing import assert_array_almost_equal from sklearn.utils._testing import skip_if_32bit +from sklearn.utils.fixes import _mode from sklearn.utils.extmath import density, _safe_accumulator_op from sklearn.utils.extmath import randomized_svd, _randomized_eigsh @@ -56,7 +56,7 @@ def test_uniform_weights(): weights = np.ones(x.shape) for axis in (None, 0, 1): - mode, score = stats.mode(x, axis) + mode, score = _mode(x, axis) mode2, score2 = weighted_mode(x, weights, axis=axis) assert_array_equal(mode, mode2) From 09f9f22c10cbd34bb3af278b83658894603491c1 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Fri, 5 Aug 2022 03:10:37 -0400 Subject: [PATCH 020/229] CI Set MACOSX_DEPLOYMENT_TARGET=10.9 (#23833) --- build_tools/github/build_wheels.sh | 4 +--- doc/whats_new/v1.1.rst | 3 +++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/build_tools/github/build_wheels.sh b/build_tools/github/build_wheels.sh index 5d379bf155146..647b47492774b 100755 --- a/build_tools/github/build_wheels.sh +++ b/build_tools/github/build_wheels.sh @@ -18,9 +18,7 @@ if [[ "$RUNNER_OS" == "macOS" ]]; then export MACOSX_DEPLOYMENT_TARGET=12.0 OPENMP_URL="https://anaconda.org/conda-forge/llvm-openmp/11.1.0/download/osx-arm64/llvm-openmp-11.1.0-hf3c4609_1.tar.bz2" else - # Currently, the oldest supported macos version is: High Sierra / 10.13. - # Note that Darwin_17 == High Sierra / 10.13. - export MACOSX_DEPLOYMENT_TARGET=10.13 + export MACOSX_DEPLOYMENT_TARGET=10.9 OPENMP_URL="https://anaconda.org/conda-forge/llvm-openmp/11.1.0/download/osx-64/llvm-openmp-11.1.0-hda6cdc1_1.tar.bz2" fi diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst index 20801abbcdcbc..affdaeb2dd5e4 100644 --- a/doc/whats_new/v1.1.rst +++ b/doc/whats_new/v1.1.rst @@ -19,6 +19,9 @@ Changelog have been changed in 1.1. :pr:`23990` by :user:`Julien Jerphanion `. +- |Fix| Wheels are now available for MacOS 10.9 and greater. :pr:`23833` by + `Thomas Fan`_. + :mod:`sklearn.base` ...................... From 4122f0af3aec567f13ee319824f6412372ff1ca7 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Fri, 5 Aug 2022 03:11:39 -0400 Subject: [PATCH 021/229] FIX Fixes OrdinalEncoder.inverse_tranform nan encoded values (#24087) --- doc/whats_new/v1.1.rst | 7 +++ sklearn/preprocessing/_encoders.py | 14 +++-- sklearn/preprocessing/tests/test_encoders.py | 61 ++++++++++++++++++++ 3 files changed, 77 insertions(+), 5 deletions(-) diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst index affdaeb2dd5e4..409448270ec27 100644 --- a/doc/whats_new/v1.1.rst +++ b/doc/whats_new/v1.1.rst @@ -36,6 +36,13 @@ Changelog a node if there are duplicates in the dataset. :pr:`23395` by :user:`Jérémie du Boisberranger `. +:mod:`sklearn.preprocessing` +............................ + +- |Fix| :meth:`preprocessing.OrdinalEncoder.inverse_transform` correctly handles + use cases where `unknown_value` or `encoded_missing_value` is `nan`. :pr:`24087` + by `Thomas Fan`_. + .. _changes_1_1_1: Version 1.1.1 diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py index 60a73373235f0..4e5fedd00ea81 100644 --- a/sklearn/preprocessing/_encoders.py +++ b/sklearn/preprocessing/_encoders.py @@ -1371,19 +1371,23 @@ def inverse_transform(self, X): found_unknown = {} for i in range(n_features): - labels = X[:, i].astype("int64", copy=False) + labels = X[:, i] # replace values of X[:, i] that were nan with actual indices if i in self._missing_indices: - X_i_mask = _get_mask(X[:, i], self.encoded_missing_value) + X_i_mask = _get_mask(labels, self.encoded_missing_value) labels[X_i_mask] = self._missing_indices[i] if self.handle_unknown == "use_encoded_value": - unknown_labels = labels == self.unknown_value - X_tr[:, i] = self.categories_[i][np.where(unknown_labels, 0, labels)] + unknown_labels = _get_mask(labels, self.unknown_value) + + known_labels = ~unknown_labels + X_tr[known_labels, i] = self.categories_[i][ + labels[known_labels].astype("int64", copy=False) + ] found_unknown[i] = unknown_labels else: - X_tr[:, i] = self.categories_[i][labels] + X_tr[:, i] = self.categories_[i][labels.astype("int64", copy=False)] # insert None values for unknown values if found_unknown: diff --git a/sklearn/preprocessing/tests/test_encoders.py b/sklearn/preprocessing/tests/test_encoders.py index 54e350c8d7acd..c439faf50ee7d 100644 --- a/sklearn/preprocessing/tests/test_encoders.py +++ b/sklearn/preprocessing/tests/test_encoders.py @@ -1837,6 +1837,15 @@ def test_ordinal_encoder_unknown_missing_interaction(): X_test_trans = oe.transform(X_test) assert_allclose(X_test_trans, [[np.nan], [-3]]) + # Non-regression test for #24082 + X_roundtrip = oe.inverse_transform(X_test_trans) + + # np.nan is unknown so it maps to None + assert X_roundtrip[0][0] is None + + # -3 is the encoded missing value so it maps back to nan + assert np.isnan(X_roundtrip[1][0]) + @pytest.mark.parametrize("with_pandas", [True, False]) def test_ordinal_encoder_encoded_missing_value_error(with_pandas): @@ -1862,3 +1871,55 @@ def test_ordinal_encoder_encoded_missing_value_error(with_pandas): with pytest.raises(ValueError, match=error_msg): oe.fit(X) + + +@pytest.mark.parametrize( + "X_train, X_test_trans_expected, X_roundtrip_expected", + [ + ( + # missing value is not in training set + # inverse transform will considering encoded nan as unknown + np.array([["a"], ["1"]], dtype=object), + [[0], [np.nan], [np.nan]], + np.asarray([["1"], [None], [None]], dtype=object), + ), + ( + # missing value in training set, + # inverse transform will considering encoded nan as missing + np.array([[np.nan], ["1"], ["a"]], dtype=object), + [[0], [np.nan], [np.nan]], + np.asarray([["1"], [np.nan], [np.nan]], dtype=object), + ), + ], +) +def test_ordinal_encoder_unknown_missing_interaction_both_nan( + X_train, X_test_trans_expected, X_roundtrip_expected +): + """Check transform when unknown_value and encoded_missing_value is nan. + + Non-regression test for #24082. + """ + oe = OrdinalEncoder( + handle_unknown="use_encoded_value", + unknown_value=np.nan, + encoded_missing_value=np.nan, + ).fit(X_train) + + X_test = np.array([["1"], [np.nan], ["b"]]) + X_test_trans = oe.transform(X_test) + + # both nan and unknown are encoded as nan + assert_allclose(X_test_trans, X_test_trans_expected) + X_roundtrip = oe.inverse_transform(X_test_trans) + + n_samples = X_roundtrip_expected.shape[0] + for i in range(n_samples): + expected_val = X_roundtrip_expected[i, 0] + val = X_roundtrip[i, 0] + + if expected_val is None: + assert val is None + elif is_scalar_nan(expected_val): + assert np.isnan(val) + else: + assert val == expected_val From 3bf869cd13025499ed5d971ab5599baa338d1bc2 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Fri, 5 Aug 2022 14:44:22 +0200 Subject: [PATCH 022/229] move some issues from 1.2 to 1.1 --- doc/whats_new/v1.1.rst | 61 +++++++++++++++++++++++++++++++++++++++++- doc/whats_new/v1.2.rst | 38 -------------------------- 2 files changed, 60 insertions(+), 39 deletions(-) diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst index 409448270ec27..1f93adf846767 100644 --- a/doc/whats_new/v1.1.rst +++ b/doc/whats_new/v1.1.rst @@ -9,6 +9,19 @@ Version 1.1.2 **In Development** +Changed models +-------------- + +The following estimators and functions, when fit with the same data and +parameters, may produce different models from the previous version. This often +occurs due to changes in the modelling logic (bug fixes or enhancements), or in +random sampling procedures. + +- |Fix| :class:`manifold.TSNE` now throws a `ValueError` when fit with + `perplexity>=n_samples` to ensure mathematical correctness of the algorithm. + :pr:`10805` by :user:`Mathias Andersen ` and + :pr:`23471` by :user:`Meekail Zain `. + Changelog --------- @@ -23,7 +36,7 @@ Changelog `Thomas Fan`_. :mod:`sklearn.base` -...................... +................... - |Fix| The `get_params` method of the :class:`BaseEstimator` class now supports estimators with `type`-type params that have the `get_params` method. @@ -36,6 +49,45 @@ Changelog a node if there are duplicates in the dataset. :pr:`23395` by :user:`Jérémie du Boisberranger `. +:mod:`sklearn.feature_selection` +................................ + +- |Fix| :class:`feature_selection.SelectFromModel` defaults to selection + threshold 1e-5 when the estimator is either :class:`linear_model.ElasticNet` + or :class:`linear_model.ElasticNetCV` with `l1_ratio` equals 1 or + :class:`linear_model.LassoCV`. + :pr:`23636` by :user:`Hao Chun Chang `. + +:mod:`sklearn.impute` +..................... + +- |Fix| :class:`impute.SimpleImputer` uses the dtype seen in `fit` for + `transform` when the dtype is object. :pr:`22063` by `Thomas Fan`_. + +:mod:`sklearn.linear_model` +........................... + +- |Fix| Use dtype-aware tolerances for the validation of gram matrices (passed by users + or precomputed). :pr:`22059` by :user:`Malte S. Kurz `. + +- |Fix| Fixed an error in :class:`linear_model.LogisticRegression` with + `solver="newton-cg"`, `fit_intercept=True`, and a single feature. :pr:`23608` + by `Tom Dupre la Tour`_. + +:mod:`sklearn.manifold` +....................... + +- |Fix| :class:`manifold.TSNE` now throws a `ValueError` when fit with + `perplexity>=n_samples` to ensure mathematical correctness of the algorithm. + :pr:`10805` by :user:`Mathias Andersen ` and + :pr:`23471` by :user:`Meekail Zain `. + +:mod:`sklearn.metrics` +...................... + +- |Fix| Fixed error message of :class:`metrics.coverage_error` for 1D array input. + :pr:`23548` by :user:`Hao Chun Chang `. + :mod:`sklearn.preprocessing` ............................ @@ -43,6 +95,13 @@ Changelog use cases where `unknown_value` or `encoded_missing_value` is `nan`. :pr:`24087` by `Thomas Fan`_. +:mod:`sklearn.tree` +................... + +- |Fix| Fixed invalid memory access bug during fit in + :class:`tree.DecisionTreeRegressor` and :class:`tree.DecisionTreeClassifier`. + :pr:`23273` by `Thomas Fan`_. + .. _changes_1_1_1: Version 1.1.1 diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index 8f728ab0f1959..be8859a7e3e01 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -25,11 +25,6 @@ random sampling procedures. or `'lobpcg'` solvers. This change improves numerical stability of the solver, but may result in a different model. -- |Fix| :class:`manifold.TSNE` now throws a `ValueError` when fit with - `perplexity>=n_samples` to ensure mathematical correctness of the algorithm. - :pr:`10805` by :user:`Mathias Andersen ` and - :pr:`23471` by :user:`Meekail Zain ` - - |Enhancement| :class:`linear_model.GammaRegressor`, :class:`linear_model.PoissonRegressor` and :class:`linear_model.TweedieRegressor` can reach higher precision with the lbfgs solver, in particular when `tol` is set @@ -163,30 +158,9 @@ Changelog :pr:`11860` by :user:`Pierre Ablin `, :pr:`22527` by :user:`Meekail Zain ` and `Thomas Fan`_. -:mod:`sklearn.feature_selection` -................................ -- |Fix| :class:`feature_selection.SelectFromModel` defaults to selection - threshold 1e-5 when the estimator is either :class:`linear_model.ElasticNet` - or :class:`linear_model.ElasticNetCV` with `l1_ratio` equals 1 or - :class:`linear_model.LassoCV`. :pr:`23636` by :user:`Hao Chun Chang - ` - -:mod:`sklearn.impute` -..................... - -- |Fix| :class:`impute.SimpleImputer` uses the dtype seen in `fit` for - `transform` when the dtype is object. :pr:`22063` by `Thomas Fan`_. - :mod:`sklearn.linear_model` ........................... -- |Fix| Use dtype-aware tolerances for the validation of gram matrices (passed by users - or precomputed). :pr:`22059` by :user:`Malte S. Kurz `. - -- |Fix| Fixed an error in :class:`linear_model.LogisticRegression` with - `solver="newton-cg"`, `fit_intercept=True`, and a single feature. :pr:`23608` - by `Tom Dupre la Tour`_. - - |Enhancement| :class:`linear_model.GammaRegressor`, :class:`linear_model.PoissonRegressor` and :class:`linear_model.TweedieRegressor` can reach higher precision with the lbfgs solver, in particular when `tol` is set @@ -217,9 +191,6 @@ Changelog :pr:`22710` by :user:`Conroy Trinh ` and :pr:`23461` by :user:`Meekail Zain `. -- |Fix| Fixed error message of :class:`metrics.coverage_error` for 1D array input. - :pr:`23548` by :user:`Hao Chun Chang `. - :mod:`sklearn.multioutput` .......................... @@ -275,10 +246,6 @@ Changelog - |Enhancement| :func:`tree.plot_tree`, :func:`tree.export_graphviz` now uses a lower case `x[i]` to represent feature `i`. :pr:`23480` by `Thomas Fan`_. -- |Fix| Fixed invalid memory access bug during fit in - :class:`tree.DecisionTreeRegressor` and :class:`tree.DecisionTreeClassifier`. - :pr:`23273` by `Thomas Fan`_. - :mod:`sklearn.utils` .................... @@ -306,11 +273,6 @@ Changelog `eigen_tol="auto"` in version 1.3. :pr:`23210` by :user:`Meekail Zain `. -- |Fix| :class:`manifold.TSNE` now throws a `ValueError` when fit with - `perplexity>=n_samples` to ensure mathematical correctness of the algorithm. - :pr:`10805` by :user:`Mathias Andersen ` and - :pr:`23471` by :user:`Meekail Zain ` - :mod:`sklearn.naive_bayes` .......................... From 82f23e1baf8f98afa1192630e957a7e51ed51f71 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Fri, 5 Aug 2022 14:55:07 +0200 Subject: [PATCH 023/229] DOC add date for release 1.1.2 --- doc/templates/index.html | 2 ++ doc/whats_new/v1.1.rst | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/templates/index.html b/doc/templates/index.html index d2bd879958a3b..755ff52821938 100644 --- a/doc/templates/index.html +++ b/doc/templates/index.html @@ -166,6 +166,8 @@

News

  • On-going development: What's new (Changelog)
  • +
  • August 2022. scikit-learn 1.1.2 is available for download (Changelog). +
  • May 2022. scikit-learn 1.1.1 is available for download (Changelog).
  • May 2022. scikit-learn 1.1.0 is available for download (Changelog). diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst index 1f93adf846767..4417987640f94 100644 --- a/doc/whats_new/v1.1.rst +++ b/doc/whats_new/v1.1.rst @@ -7,7 +7,7 @@ Version 1.1.2 ============= -**In Development** +**August 2022** Changed models -------------- From 9380cf1e1269ca5872ad60fa5ae5a1a76de4e521 Mon Sep 17 00:00:00 2001 From: Ravi Makhija <87270246+ravimakhija@users.noreply.github.com> Date: Fri, 5 Aug 2022 09:32:20 -0400 Subject: [PATCH 024/229] Fix Taylor expansion documentation (#24122) Co-authored-by: Julien Jerphanion --- doc/modules/ensemble.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst index 7d64a0e91181c..80254c0d55dc7 100644 --- a/doc/modules/ensemble.rst +++ b/doc/modules/ensemble.rst @@ -681,7 +681,7 @@ approximated as follows: .. note:: Briefly, a first-order Taylor approximation says that - :math:`l(z) \approx l(a) + (z - a) \frac{\partial l(a)}{\partial a}`. + :math:`l(z) \approx l(a) + (z - a) \frac{\partial l}{\partial z}(a)`. Here, :math:`z` corresponds to :math:`F_{m - 1}(x_i) + h_m(x_i)`, and :math:`a` corresponds to :math:`F_{m-1}(x_i)` From f7ddb6b7e58fea2d03cf0cae038e960a2b6abb0c Mon Sep 17 00:00:00 2001 From: Sean Atukorala Date: Fri, 5 Aug 2022 09:45:00 -0400 Subject: [PATCH 025/229] DOC Updates linear_model.Perceptron regarding t_ (#24073) --- sklearn/linear_model/_passive_aggressive.py | 4 ++-- sklearn/linear_model/_perceptron.py | 2 +- sklearn/linear_model/_stochastic_gradient.py | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sklearn/linear_model/_passive_aggressive.py b/sklearn/linear_model/_passive_aggressive.py index 0b029fe781376..fd59fe1373c7c 100644 --- a/sklearn/linear_model/_passive_aggressive.py +++ b/sklearn/linear_model/_passive_aggressive.py @@ -141,7 +141,7 @@ class PassiveAggressiveClassifier(BaseSGDClassifier): t_ : int Number of weight updates performed during training. - Same as ``(n_iter_ * n_samples)``. + Same as ``(n_iter_ * n_samples + 1)``. loss_function_ : callable Loss function used by the algorithm. @@ -428,7 +428,7 @@ class PassiveAggressiveRegressor(BaseSGDRegressor): t_ : int Number of weight updates performed during training. - Same as ``(n_iter_ * n_samples)``. + Same as ``(n_iter_ * n_samples + 1)``. See Also -------- diff --git a/sklearn/linear_model/_perceptron.py b/sklearn/linear_model/_perceptron.py index d47e17a92d08b..88ba6a7ccf223 100644 --- a/sklearn/linear_model/_perceptron.py +++ b/sklearn/linear_model/_perceptron.py @@ -136,7 +136,7 @@ class Perceptron(BaseSGDClassifier): t_ : int Number of weight updates performed during training. - Same as ``(n_iter_ * n_samples)``. + Same as ``(n_iter_ * n_samples + 1)``. See Also -------- diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py index 7552fcc8f88b6..49da69975c12e 100644 --- a/sklearn/linear_model/_stochastic_gradient.py +++ b/sklearn/linear_model/_stochastic_gradient.py @@ -1138,7 +1138,7 @@ class SGDClassifier(BaseSGDClassifier): t_ : int Number of weight updates performed during training. - Same as ``(n_iter_ * n_samples)``. + Same as ``(n_iter_ * n_samples + 1)``. n_features_in_ : int Number of features seen during :term:`fit`. @@ -1912,7 +1912,7 @@ class SGDRegressor(BaseSGDRegressor): t_ : int Number of weight updates performed during training. - Same as ``(n_iter_ * n_samples)``. + Same as ``(n_iter_ * n_samples + 1)``. n_features_in_ : int Number of features seen during :term:`fit`. @@ -2123,7 +2123,7 @@ class SGDOneClassSVM(BaseSGD, OutlierMixin): t_ : int Number of weight updates performed during training. - Same as ``(n_iter_ * n_samples)``. + Same as ``(n_iter_ * n_samples + 1)``. loss_function_ : concrete ``LossFunction`` From b831a210597f205b1b15c5591e6a5804bf6c1194 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Fri, 5 Aug 2022 09:47:54 -0400 Subject: [PATCH 026/229] DOC Remove deprecated multichannel parameter in example (#24116) --- doc/tutorial/statistical_inference/unsupervised_learning.rst | 2 +- examples/cluster/plot_coin_segmentation.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/doc/tutorial/statistical_inference/unsupervised_learning.rst b/doc/tutorial/statistical_inference/unsupervised_learning.rst index c893ef5b8e010..909b6f4ab1526 100644 --- a/doc/tutorial/statistical_inference/unsupervised_learning.rst +++ b/doc/tutorial/statistical_inference/unsupervised_learning.rst @@ -156,7 +156,7 @@ also referred to as connected components) when clustering an image. >>> from skimage.transform import rescale >>> rescaled_coins = rescale( ... gaussian_filter(coins(), sigma=2), - ... 0.2, mode='reflect', anti_aliasing=False, multichannel=False + ... 0.2, mode='reflect', anti_aliasing=False ... ) >>> X = np.reshape(rescaled_coins, (-1, 1)) diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py index 229ca182c1e65..bec68d1221646 100644 --- a/examples/cluster/plot_coin_segmentation.py +++ b/examples/cluster/plot_coin_segmentation.py @@ -44,9 +44,7 @@ # Applying a Gaussian filter for smoothing prior to down-scaling # reduces aliasing artifacts. smoothened_coins = gaussian_filter(orig_coins, sigma=2) -rescaled_coins = rescale( - smoothened_coins, 0.2, mode="reflect", anti_aliasing=False, multichannel=False -) +rescaled_coins = rescale(smoothened_coins, 0.2, mode="reflect", anti_aliasing=False) # Convert the image into a graph with the value of the gradient on the # edges. From 45d6384b3959ce65e43d4f9eafaf2a10ca4230a6 Mon Sep 17 00:00:00 2001 From: Meekail Zain <34613774+Micky774@users.noreply.github.com> Date: Fri, 5 Aug 2022 10:02:34 -0400 Subject: [PATCH 027/229] MAINT Minor refactor to de-indent majority of `_assert_all_finite` (#24118) --- sklearn/utils/validation.py | 93 +++++++++++++++++++------------------ 1 file changed, 48 insertions(+), 45 deletions(-) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 6bfe99fdaffdc..f491a25e1e620 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -99,56 +99,59 @@ def _assert_all_finite( if _get_config()["assume_finite"]: return X = np.asanyarray(X) + + # for object dtype data, we only check for NaNs (GH-13254) + if X.dtype == np.dtype("object") and not allow_nan: + if _object_dtype_isnan(X).any(): + raise ValueError("Input contains NaN") + + # We need only consider float arrays, hence can early return for all else. + if X.dtype.kind not in "fc": + return + # First try an O(n) time, O(1) space solution for the common case that # everything is finite; fall back to O(n) space `np.isinf/isnan` or custom # Cython implementation to prevent false positives and provide a detailed # error message. - is_float = X.dtype.kind in "fc" - if is_float: - with np.errstate(over="ignore"): - first_pass_isfinite = np.isfinite(np.sum(X)) - if first_pass_isfinite: - return - # Cython implementation doesn't support FP16 or complex numbers - use_cython = X.data.contiguous and X.dtype.type in {np.float32, np.float64} - if use_cython: - out = cy_isfinite(X.reshape(-1), allow_nan=allow_nan) - has_nan_error = False if allow_nan else out == FiniteStatus.has_nan - has_inf = out == FiniteStatus.has_infinite + with np.errstate(over="ignore"): + first_pass_isfinite = np.isfinite(np.sum(X)) + if first_pass_isfinite: + return + # Cython implementation doesn't support FP16 or complex numbers + use_cython = X.data.contiguous and X.dtype.type in {np.float32, np.float64} + if use_cython: + out = cy_isfinite(X.reshape(-1), allow_nan=allow_nan) + has_nan_error = False if allow_nan else out == FiniteStatus.has_nan + has_inf = out == FiniteStatus.has_infinite + else: + has_inf = np.isinf(X).any() + has_nan_error = False if allow_nan else np.isnan(X).any() + if has_inf or has_nan_error: + if has_nan_error: + type_err = "NaN" else: - has_inf = np.isinf(X).any() - has_nan_error = False if allow_nan else np.isnan(X).any() - if has_inf or has_nan_error: - if has_nan_error: - type_err = "NaN" - else: - msg_dtype = msg_dtype if msg_dtype is not None else X.dtype - type_err = f"infinity or a value too large for {msg_dtype!r}" - padded_input_name = input_name + " " if input_name else "" - msg_err = f"Input {padded_input_name}contains {type_err}." - if estimator_name and input_name == "X" and has_nan_error: - # Improve the error message on how to handle missing values in - # scikit-learn. - msg_err += ( - f"\n{estimator_name} does not accept missing values" - " encoded as NaN natively. For supervised learning, you might want" - " to consider sklearn.ensemble.HistGradientBoostingClassifier and" - " Regressor which accept missing values encoded as NaNs natively." - " Alternatively, it is possible to preprocess the data, for" - " instance by using an imputer transformer in a pipeline or drop" - " samples with missing values. See" - " https://scikit-learn.org/stable/modules/impute.html" - " You can find a list of all estimators that handle NaN values" - " at the following page:" - " https://scikit-learn.org/stable/modules/impute.html" - "#estimators-that-handle-nan-values" - ) - raise ValueError(msg_err) - - # for object dtype data, we only check for NaNs (GH-13254) - elif X.dtype == np.dtype("object") and not allow_nan: - if _object_dtype_isnan(X).any(): - raise ValueError("Input contains NaN") + msg_dtype = msg_dtype if msg_dtype is not None else X.dtype + type_err = f"infinity or a value too large for {msg_dtype!r}" + padded_input_name = input_name + " " if input_name else "" + msg_err = f"Input {padded_input_name}contains {type_err}." + if estimator_name and input_name == "X" and has_nan_error: + # Improve the error message on how to handle missing values in + # scikit-learn. + msg_err += ( + f"\n{estimator_name} does not accept missing values" + " encoded as NaN natively. For supervised learning, you might want" + " to consider sklearn.ensemble.HistGradientBoostingClassifier and" + " Regressor which accept missing values encoded as NaNs natively." + " Alternatively, it is possible to preprocess the data, for" + " instance by using an imputer transformer in a pipeline or drop" + " samples with missing values. See" + " https://scikit-learn.org/stable/modules/impute.html" + " You can find a list of all estimators that handle NaN values" + " at the following page:" + " https://scikit-learn.org/stable/modules/impute.html" + "#estimators-that-handle-nan-values" + ) + raise ValueError(msg_err) def assert_all_finite( From ed05c40948c43c473a804925d4122f9f10a8fa20 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Fri, 5 Aug 2022 10:27:19 -0400 Subject: [PATCH 028/229] DOC Update MDS init docs in fit (#24070) --- sklearn/manifold/_mds.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/manifold/_mds.py b/sklearn/manifold/_mds.py index a8c5e6d15d16c..e9cc8e9bf1df8 100644 --- a/sklearn/manifold/_mds.py +++ b/sklearn/manifold/_mds.py @@ -539,7 +539,7 @@ def fit(self, X, y=None, init=None): y : Ignored Not used, present for API consistency by convention. - init : ndarray of shape (n_samples,), default=None + init : ndarray of shape (n_samples, n_components), default=None Starting configuration of the embedding to initialize the SMACOF algorithm. By default, the algorithm is initialized with a randomly chosen array. From 2ad30dfabc2c7b5f36479cf3874ea02a40423608 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Fri, 5 Aug 2022 10:27:54 -0400 Subject: [PATCH 029/229] MNT Use is_sparse to validate sparse data from pandas (#24074) Co-authored-by: Julien Jerphanion --- sklearn/utils/validation.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index f491a25e1e620..e706ed897f890 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -807,18 +807,22 @@ def check_array( # When all dataframe columns are sparse, convert to a sparse array if hasattr(array, "sparse") and array.ndim > 1: - # DataFrame.sparse only supports `to_coo` - array = array.sparse.to_coo() - if array.dtype == np.dtype("object"): - unique_dtypes = set([dt.subtype.name for dt in array_orig.dtypes]) - if len(unique_dtypes) > 1: - raise ValueError( - "Pandas DataFrame with mixed sparse extension arrays " - "generated a sparse matrix with object dtype which " - "can not be converted to a scipy sparse matrix." - "Sparse extension arrays should all have the same " - "numeric type." - ) + with suppress(ImportError): + from pandas.api.types import is_sparse + + if array.dtypes.apply(is_sparse).all(): + # DataFrame.sparse only supports `to_coo` + array = array.sparse.to_coo() + if array.dtype == np.dtype("object"): + unique_dtypes = set([dt.subtype.name for dt in array_orig.dtypes]) + if len(unique_dtypes) > 1: + raise ValueError( + "Pandas DataFrame with mixed sparse extension arrays " + "generated a sparse matrix with object dtype which " + "can not be converted to a scipy sparse matrix." + "Sparse extension arrays should all have the same " + "numeric type." + ) if sp.issparse(array): _ensure_no_complex_data(array) From e33fd6da854be802afeb640a167d2d35915e5448 Mon Sep 17 00:00:00 2001 From: Ravi Makhija <87270246+ravimakhija@users.noreply.github.com> Date: Sun, 7 Aug 2022 10:20:02 -0400 Subject: [PATCH 030/229] DOC Fix variable name in Adaboost example (#24135) --- examples/ensemble/plot_adaboost_multiclass.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/ensemble/plot_adaboost_multiclass.py b/examples/ensemble/plot_adaboost_multiclass.py index c94cc94959576..fae87b4a42d3d 100644 --- a/examples/ensemble/plot_adaboost_multiclass.py +++ b/examples/ensemble/plot_adaboost_multiclass.py @@ -62,11 +62,11 @@ real_test_errors = [] discrete_test_errors = [] -for real_test_predict, discrete_train_predict in zip( +for real_test_predict, discrete_test_predict in zip( bdt_real.staged_predict(X_test), bdt_discrete.staged_predict(X_test) ): real_test_errors.append(1.0 - accuracy_score(real_test_predict, y_test)) - discrete_test_errors.append(1.0 - accuracy_score(discrete_train_predict, y_test)) + discrete_test_errors.append(1.0 - accuracy_score(discrete_test_predict, y_test)) n_trees_discrete = len(bdt_discrete) n_trees_real = len(bdt_real) From 5bfd9715acb1849e338e3090d54bee5030e8dd90 Mon Sep 17 00:00:00 2001 From: Valentin Laurent Date: Mon, 8 Aug 2022 11:30:29 +0200 Subject: [PATCH 031/229] FIX Set n_jobs=None as default for neighbors transformers (#24075) --- doc/whats_new/v1.2.rst | 5 +++++ sklearn/neighbors/_graph.py | 8 ++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index be8859a7e3e01..bf9198af717a1 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -233,6 +233,11 @@ Changelog instead of failing with a low-level error message at predict-time. :pr:`23874` by :user:`Juan Gomez <2357juan>`. +- |Fix| Set `n_jobs=None` by default (instead of `1`) for + :class:`neighbors.KNeighborsTransformer` and + :class:`neighbors.RadiusNeighborsTransformer`. + :pr:`24075` by :user:`Valentin Laurent `. + :mod:`sklearn.svm` .................. diff --git a/sklearn/neighbors/_graph.py b/sklearn/neighbors/_graph.py index 2304abae9da63..a77b835eae856 100644 --- a/sklearn/neighbors/_graph.py +++ b/sklearn/neighbors/_graph.py @@ -291,7 +291,7 @@ class KNeighborsTransformer( metric_params : dict, default=None Additional keyword arguments for the metric function. - n_jobs : int, default=1 + n_jobs : int, default=None The number of parallel jobs to run for neighbors search. If ``-1``, then the number of jobs is set to the number of CPU cores. @@ -358,7 +358,7 @@ def __init__( metric="minkowski", p=2, metric_params=None, - n_jobs=1, + n_jobs=None, ): super(KNeighborsTransformer, self).__init__( n_neighbors=n_neighbors, @@ -515,7 +515,7 @@ class RadiusNeighborsTransformer( metric_params : dict, default=None Additional keyword arguments for the metric function. - n_jobs : int, default=1 + n_jobs : int, default=None The number of parallel jobs to run for neighbors search. If ``-1``, then the number of jobs is set to the number of CPU cores. @@ -586,7 +586,7 @@ def __init__( metric="minkowski", p=2, metric_params=None, - n_jobs=1, + n_jobs=None, ): super(RadiusNeighborsTransformer, self).__init__( n_neighbors=None, From 13c9e890f02b38e66c9ca4ce9b0bfb9e99c6e8d2 Mon Sep 17 00:00:00 2001 From: Stefanie Molin <24376333+stefmolin@users.noreply.github.com> Date: Mon, 8 Aug 2022 11:41:02 -0400 Subject: [PATCH 032/229] DOC Update `validate_parameter_constraints()` docstring. (#24130) --- sklearn/utils/_param_validation.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sklearn/utils/_param_validation.py b/sklearn/utils/_param_validation.py index dccde16c799e1..a73ed4042ca15 100644 --- a/sklearn/utils/_param_validation.py +++ b/sklearn/utils/_param_validation.py @@ -37,6 +37,10 @@ def validate_parameter_constraints(parameter_constraints, params, caller_name): - a StrOptions object, representing a set of strings - the string "boolean" - the string "verbose" + - the string "cv_object" + - the string "missing_values" + - a HasMethods object, representing method(s) an object must have + - a Hidden object, representing a constraint not meant to be exposed to the user params : dict A dictionary `param_name: param_value`. The parameters to validate against the From 5ee88addfeafd9bf3ea747e3386d2b86b4468b30 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Tue, 9 Aug 2022 00:59:14 -0400 Subject: [PATCH 033/229] DOC Fixes search on webpage (#24128) --- build_tools/github/doc_linux-64_conda.lock | 93 +-- .../doc_min_dependencies_linux-64_conda.lock | 30 +- doc/conf.py | 2 + doc/themes/scikit-learn-modern/search.html | 2 +- .../static/js/searchtools.js | 595 ------------------ 5 files changed, 65 insertions(+), 657 deletions(-) delete mode 100644 doc/themes/scikit-learn-modern/static/js/searchtools.js diff --git a/build_tools/github/doc_linux-64_conda.lock b/build_tools/github/doc_linux-64_conda.lock index c1ccf6437f624..f332749eae7f2 100644 --- a/build_tools/github/doc_linux-64_conda.lock +++ b/build_tools/github/doc_linux-64_conda.lock @@ -1,6 +1,6 @@ # Generated by conda-lock. # platform: linux-64 -# input_hash: 1b1e977a1b5dcedea55d0a8b53d501d1c1211b70a4419ec48dafe1b2658b4ef8 +# input_hash: eb6f446eaacd58fbff84b6520503d14519011e67e0f6ce3d39a9a2922fb781cb @EXPLICIT https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2022.6.15-ha878542_0.tar.bz2#c320890f77fd1d617fa876e0982002c2 @@ -10,9 +10,9 @@ https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77 https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-hab24e00_0.tar.bz2#19410c3df09dfb12d1206132a1d357c5 https://conda.anaconda.org/conda-forge/noarch/kernel-headers_linux-64-2.6.32-he073ed8_15.tar.bz2#5dd5127afd710f91f6a75821bac0a4f0 https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.36.1-hea4e1c9_2.tar.bz2#bd4f2e711b39af170e7ff15163fe87ee -https://conda.anaconda.org/conda-forge/linux-64/libgcc-devel_linux-64-10.3.0-he6cfe16_16.tar.bz2#878a30aba0574e69bd920c55f243aa06 +https://conda.anaconda.org/conda-forge/linux-64/libgcc-devel_linux-64-10.4.0-h74af60c_16.tar.bz2#249e3f4b31c67c726ee599e236a9927b https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-12.1.0-hdcd56e2_16.tar.bz2#b02605b875559ff99f04351fd5040760 -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-devel_linux-64-10.3.0-he6cfe16_16.tar.bz2#baae55f62968547a3731cb668736f611 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-devel_linux-64-10.4.0-h74af60c_16.tar.bz2#dfdb4caec8c73c80a6803952e7a403d0 https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-12.1.0-ha89aaad_16.tar.bz2#6f5ba041a41eb102a1027d9e68731be7 https://conda.anaconda.org/conda-forge/noarch/tzdata-2022a-h191b570_0.tar.bz2#84be5301069417a2221187d2f435e0f7 https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29 @@ -26,35 +26,36 @@ https://conda.anaconda.org/conda-forge/linux-64/binutils_linux-64-2.36-hf3e587d_ https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793 https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-12.1.0-h8d9b700_16.tar.bz2#4f05bc9844f7c101e6e147dab3c88d5c https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.6.1-h7f98852_0.tar.bz2#0347ce6a34f8b55b544b141432c6d4c7 -https://conda.anaconda.org/conda-forge/linux-64/aom-3.3.0-h27087fc_1.tar.bz2#fe863d1e92331e69c8f231df5eaf5e16 +https://conda.anaconda.org/conda-forge/linux-64/aom-3.4.0-h27087fc_1.tar.bz2#2c106206f789e598ae86e775c69bd78f https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.1-h166bdaf_0.tar.bz2#ec47e97c8e0b27dcadbebc4d17764548 https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h7f98852_4.tar.bz2#a1fd65c7ccbf10880423d82bca54eb54 https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.18.1-h7f98852_0.tar.bz2#f26ef8098fab1f719c91eb760d63381a https://conda.anaconda.org/conda-forge/linux-64/charls-2.3.4-h9c3ff4c_0.tar.bz2#c3f85a96a52befc5e41cab1145c8d3c2 +https://conda.anaconda.org/conda-forge/linux-64/dav1d-1.0.0-h166bdaf_1.tar.bz2#e890928299fe7242a108850fc0a5b7fc https://conda.anaconda.org/conda-forge/linux-64/expat-2.4.8-h27087fc_0.tar.bz2#e1b07832504eeba765d648389cc387a9 -https://conda.anaconda.org/conda-forge/linux-64/fftw-3.3.10-nompi_h77c792f_102.tar.bz2#208f18b1d596b50c6a92a12b30ebe31f +https://conda.anaconda.org/conda-forge/linux-64/fftw-3.3.10-nompi_ha7695d1_103.tar.bz2#a56c5033619bdf56a22a1f0a0fd286aa https://conda.anaconda.org/conda-forge/linux-64/giflib-5.2.1-h36c2ea0_2.tar.bz2#626e68ae9cc5912d6adb79d318cf962d https://conda.anaconda.org/conda-forge/linux-64/icu-70.1-h27087fc_0.tar.bz2#87473a15119779e021c314249d4b4aed https://conda.anaconda.org/conda-forge/linux-64/jpeg-9e-h166bdaf_2.tar.bz2#ee8b844357a0946870901c7c6f418268 https://conda.anaconda.org/conda-forge/linux-64/jxrlib-1.1-h7f98852_2.tar.bz2#8e787b08fe19986d99d034b839df2961 https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 -https://conda.anaconda.org/conda-forge/linux-64/lerc-3.0-h9c3ff4c_0.tar.bz2#7fcefde484980d23f0ec24c11e314d2e +https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f https://conda.anaconda.org/conda-forge/linux-64/libaec-1.0.6-h9c3ff4c_0.tar.bz2#c77f5e4e418fa47d699d6afa54c5d444 https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.0.9-h166bdaf_7.tar.bz2#f82dc1c78bcf73583f2656433ce2933c https://conda.anaconda.org/conda-forge/linux-64/libdb-6.2.32-h9c3ff4c_0.tar.bz2#3f3258d8f841fbac63b36b75bdac1afd -https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.12-h166bdaf_0.tar.bz2#d56e3db8fa642fb383f18f5be35eeef2 +https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.13-h166bdaf_0.tar.bz2#4b5bee2e957570197327d0b20a718891 https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-h516909a_1.tar.bz2#6f8720dff19e17ce5d48cfe7f3d2f0a3 https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.16-h516909a_0.tar.bz2#5c0f338a513a2943c659ae619fca9211 https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.0-h7f98852_0.tar.bz2#39b1328babf85c7c3a61636d9cd50206 https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.4-h7f98852_1.tar.bz2#6e8cc2173440d77708196c5b93771680 -https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.20-pthreads_h78a6416_0.tar.bz2#9b6d0781953c9e353faee494336cc229 +https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.20-pthreads_h78a6416_1.tar.bz2#759c6f385ca4110f5fb185d404d306a3 https://conda.anaconda.org/conda-forge/linux-64/libopus-1.3.1-h7f98852_1.tar.bz2#15345e56d527b330e1cacbdf58676e8f -https://conda.anaconda.org/conda-forge/linux-64/libsanitizer-10.3.0-h26c7422_16.tar.bz2#4076c395c7fa53cd708949e4be48154e +https://conda.anaconda.org/conda-forge/linux-64/libsanitizer-10.4.0-hde28e3b_16.tar.bz2#2a085c59fe36671022c46187c7d1a560 https://conda.anaconda.org/conda-forge/linux-64/libtool-2.4.6-h9c3ff4c_1008.tar.bz2#16e143a1ed4b4fd169536373957f6fee https://conda.anaconda.org/conda-forge/linux-64/libudev1-249-h166bdaf_4.tar.bz2#dc075ff6fcb46b3d3c7652e543d5f334 https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.32.1-h7f98852_1000.tar.bz2#772d69f030955d9646d3d0eaf21d859d -https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.2.3-h166bdaf_0.tar.bz2#3d6168ac3560d473e52a7cb836400135 +https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.2.3-h166bdaf_2.tar.bz2#99c0160c84e61348aa8eb2949b24e6d3 https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.12-h166bdaf_2.tar.bz2#8302381297332ea50532cf2c67961080 https://conda.anaconda.org/conda-forge/linux-64/libzopfli-1.0.3-h9c3ff4c_0.tar.bz2#c66fe2d123249af7651ebde8984c51c2 https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.3-h9c3ff4c_1.tar.bz2#fbe97e8fa6f275d7c76a09e795adc3e6 @@ -70,9 +71,9 @@ https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.5-h516909a_1.tar.bz2#33f6 https://conda.anaconda.org/conda-forge/linux-64/yaml-0.2.5-h7f98852_2.tar.bz2#4cb3ad778ec2d5a7acbdf254eb1c42ae https://conda.anaconda.org/conda-forge/linux-64/zfp-0.5.5-h9c3ff4c_8.tar.bz2#f12900b1e1e0527c0e9a4e922a5de2bf https://conda.anaconda.org/conda-forge/linux-64/zlib-ng-2.0.6-h166bdaf_0.tar.bz2#8650e4fb44c4a618e5ab3e1e19607e32 -https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-10.3.0-hf2f2afa_16.tar.bz2#a340c8ce9e702836e999984fcabd1b6e +https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-10.4.0-h7ee1905_16.tar.bz2#a215160d13969222fc09e5e687b7a455 https://conda.anaconda.org/conda-forge/linux-64/gettext-0.19.8.1-h73d1719_1008.tar.bz2#af49250eca8e139378f8ff0ae9e57251 -https://conda.anaconda.org/conda-forge/linux-64/libavif-0.10.1-h166bdaf_0.tar.bz2#eb9a725cb66435aaf81f13ce585a64eb +https://conda.anaconda.org/conda-forge/linux-64/libavif-0.10.1-h166bdaf_1.tar.bz2#22e524a34f7289934afbcf3083038fd0 https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-15_linux64_openblas.tar.bz2#04eb983975a1be3e57d6d667414cd774 https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.0.9-h166bdaf_7.tar.bz2#37a460703214d0d1b421e2a47eb5e6d0 https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.0.9-h166bdaf_7.tar.bz2#785a9296ea478eb78c47593c4da6550f @@ -83,20 +84,20 @@ https://conda.anaconda.org/conda-forge/linux-64/libllvm14-14.0.6-he0ac6c6_0.tar. https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0 https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.13-h7f98852_1004.tar.bz2#b3653fdc58d03face9724f602218a904 https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-14.0.4-he0ac6c6_0.tar.bz2#cecc6e3cb66570ffcfb820c637890f54 -https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.0.29-haf5c9bc_1.tar.bz2#c01640c8bad562720d6caff0402dbd96 -https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.20-pthreads_h320a7e8_0.tar.bz2#4cc467036ee23a4e7dac2d2c53cc7c21 +https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.0.30-haf5c9bc_0.tar.bz2#9d3e24b1157af09abe5a2589119c7b1d +https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.20-pthreads_h320a7e8_1.tar.bz2#73f5595b7dcab3c3571fe917555b6a4a https://conda.anaconda.org/conda-forge/linux-64/portaudio-19.6.0-h57a0ea0_5.tar.bz2#5469312a373f481c05c380897fd7c923 https://conda.anaconda.org/conda-forge/linux-64/readline-8.1.2-h0f457ee_0.tar.bz2#db2ebbe2943aae81ed051a6a9af8e0fa https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.12-h27826a3_0.tar.bz2#5b8c42eb62e9fc961af70bdd6a26e168 https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.12-h166bdaf_2.tar.bz2#4533821485cde83ab12ff3d8bda83768 -https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.2-h8a70e8d_2.tar.bz2#78c26dbb6e07d95ccc0eab8d4540aa0c +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.2-h8a70e8d_3.tar.bz2#aece73b1c2f00e86cb9e4f16fab91d96 https://conda.anaconda.org/conda-forge/linux-64/blosc-1.21.1-h83bc5f7_3.tar.bz2#37baca23e60af4130cfc03e8ab9f8e22 https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.0.9-h166bdaf_7.tar.bz2#1699c1211d56a23c66047524cd76796e https://conda.anaconda.org/conda-forge/linux-64/c-blosc2-2.2.0-h7a311fb_0.tar.bz2#5bdd579ae21231f8971eab74bfb18f07 -https://conda.anaconda.org/conda-forge/linux-64/gcc-10.3.0-he2824d0_10.tar.bz2#b1f092f4fdbfff91e1df27aa46efa2bb -https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-10.3.0-hc39de41_10.tar.bz2#6e52c54509389c06f88ea74c137f75be -https://conda.anaconda.org/conda-forge/linux-64/gfortran_impl_linux-64-10.3.0-h73f4979_16.tar.bz2#0407b066da49b4562ce054c06b40558c -https://conda.anaconda.org/conda-forge/linux-64/gxx_impl_linux-64-10.3.0-hf2f2afa_16.tar.bz2#f825336ce3d4dba77ffd1edc78e7abf9 +https://conda.anaconda.org/conda-forge/linux-64/gcc-10.4.0-hb92f740_10.tar.bz2#7e43adbb6ec5b1127821ad0b92c8469c +https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-10.4.0-h9215b83_10.tar.bz2#7dd8894b2482ba8a5dcf2f3495e16cdd +https://conda.anaconda.org/conda-forge/linux-64/gfortran_impl_linux-64-10.4.0-h44b2e72_16.tar.bz2#c06d532a3bdcd0e4456ba0ae5db7ae9b +https://conda.anaconda.org/conda-forge/linux-64/gxx_impl_linux-64-10.4.0-h7ee1905_16.tar.bz2#a7b3877023ce1582355874811d601fff https://conda.anaconda.org/conda-forge/linux-64/krb5-1.19.3-h3790be6_0.tar.bz2#7d862b05445123144bec92cb1acc8ef8 https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-15_linux64_openblas.tar.bz2#f45968428e445fd0c6472b561145812a https://conda.anaconda.org/conda-forge/linux-64/libclang13-14.0.6-default_h3a83d3e_0.tar.bz2#cdbd49e0ab5c5a6c522acb8271977d4c @@ -106,10 +107,10 @@ https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-15_linux64_openb https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.47.0-h727a467_0.tar.bz2#a22567abfea169ff8048506b1ca9b230 https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.37-h753d276_3.tar.bz2#3e868978a04de8bf65a97bb86760f47a https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.10.0-ha56f1ee_2.tar.bz2#6ab4eaa11ff01801cffca0a27489dc04 -https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.4.0-hc85c160_1.tar.bz2#151f9fae3ab50f039c8735e47770aa2d +https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.4.0-h0e0dad5_3.tar.bz2#5627d42c13a9b117ae1701c6e195624f https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.9.14-h22db469_3.tar.bz2#b6f4a0850ba620030a48b88c25497aaa -https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.0.29-h28c427c_1.tar.bz2#36dbdbf505b131c7e79a3857f3537185 -https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.39.1-h4ff8645_0.tar.bz2#6acda9d2a3ea84b58637b8f880bbf29b +https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.0.30-h28c427c_0.tar.bz2#77f98ec0b224fd5ca8e7043e167efb83 +https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.39.2-h4ff8645_0.tar.bz2#2cf5cb4cd116a78e639977eb61ad9987 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.0-h166bdaf_0.tar.bz2#384e7fcb3cd162ba3e4aed4b687df566 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.0-h166bdaf_0.tar.bz2#637054603bb7594302e3bf83f0a99879 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.9-h166bdaf_0.tar.bz2#732e22f1741bccea861f5668cf7342a7 @@ -118,11 +119,11 @@ https://conda.anaconda.org/conda-forge/linux-64/brotli-1.0.9-h166bdaf_7.tar.bz2# https://conda.anaconda.org/conda-forge/linux-64/c-compiler-1.4.2-h166bdaf_0.tar.bz2#d3a922efc75c5f2534372eead96d46be https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d https://conda.anaconda.org/conda-forge/linux-64/freetype-2.10.4-h0708190_1.tar.bz2#4a06f2ac2e5bfae7b6b245171c3f07aa -https://conda.anaconda.org/conda-forge/linux-64/gfortran-10.3.0-h18518b4_10.tar.bz2#794676d0d7dd69998cb12654cf37f43d -https://conda.anaconda.org/conda-forge/linux-64/gfortran_linux-64-10.3.0-hb09a455_10.tar.bz2#dd9058b625a6edce185db90cf99e3590 +https://conda.anaconda.org/conda-forge/linux-64/gfortran-10.4.0-h0c96582_10.tar.bz2#947e6c3d75456718c7ae2c6d3d19190b +https://conda.anaconda.org/conda-forge/linux-64/gfortran_linux-64-10.4.0-h69d5af5_10.tar.bz2#3088dc784ec2911a456f1514958e82e1 https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.72.1-h6239696_0.tar.bz2#a3a99cc33279091262bbc4f5ee7c4571 -https://conda.anaconda.org/conda-forge/linux-64/gxx-10.3.0-he2824d0_10.tar.bz2#121c9d19ecb6cf6e587c2ab8dfb5c73c -https://conda.anaconda.org/conda-forge/linux-64/gxx_linux-64-10.3.0-h2593f52_10.tar.bz2#075de70ba0493c56ed9e9cda930978ce +https://conda.anaconda.org/conda-forge/linux-64/gxx-10.4.0-hb92f740_10.tar.bz2#220d9ecccb6c95d91b2bba613cc6a6bd +https://conda.anaconda.org/conda-forge/linux-64/gxx_linux-64-10.4.0-h6e491c6_10.tar.bz2#f8a225e6190a7f5a14702d0549014cba https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.12-hddcbb42_0.tar.bz2#797117394a4aa588de6d741b06fad80f https://conda.anaconda.org/conda-forge/linux-64/libclang-14.0.6-default_h2e3cab8_0.tar.bz2#eb70548da697e50cefa7ba939d57d001 https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-hf5a7f15_1.tar.bz2#005557d6df00af70e438bcd532ce2304 @@ -130,14 +131,14 @@ https://conda.anaconda.org/conda-forge/linux-64/libcurl-7.83.1-h7bff187_0.tar.bz https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-15_linux64_openblas.tar.bz2#31f21773a784f59962ea542a3d71ad87 https://conda.anaconda.org/conda-forge/linux-64/libpq-14.4-hd77ab85_0.tar.bz2#7024df220bd8680192d4bad4024122d1 https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.0.31-h9c3ff4c_1.tar.bz2#fc4b6d93da04731db7601f2a1b1dc96a -https://conda.anaconda.org/conda-forge/linux-64/libwebp-1.2.3-h522a892_0.tar.bz2#96e218d06394a4e4d77028cf70162a09 +https://conda.anaconda.org/conda-forge/linux-64/libwebp-1.2.3-h522a892_1.tar.bz2#424fabaabbfb6ec60492d3aba900f513 https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.0.3-he3ba5ed_0.tar.bz2#f9dbabc7e01c459ed7a1d1d64b206e9b https://conda.anaconda.org/conda-forge/linux-64/nss-3.78-h2350873_0.tar.bz2#ab3df39f96742e6f1a9878b09274c1dc https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.4.0-hb52868f_1.tar.bz2#b7ad78ad2e9ee155f59e6428406ee824 https://conda.anaconda.org/conda-forge/linux-64/python-3.9.13-h9a8a25e_0_cpython.tar.bz2#69bc307cc4d7396c5fccb26bbcc9c379 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h166bdaf_0.tar.bz2#c9b568bd804cb2903c6be6f5f68182e4 https://conda.anaconda.org/conda-forge/noarch/alabaster-0.7.12-py_0.tar.bz2#2489a97287f90176ecdc3ca982b4b0a0 -https://conda.anaconda.org/conda-forge/noarch/attrs-21.4.0-pyhd8ed1ab_0.tar.bz2#f70280205d7044c8b8358c8de3190e5d +https://conda.anaconda.org/conda-forge/noarch/attrs-22.1.0-pyh71513ae_1.tar.bz2#6d3ccbc56256204925bfa8378722792f https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-15_linux64_openblas.tar.bz2#2b5095be485bdb407ff3134358c3ca9c https://conda.anaconda.org/conda-forge/linux-64/brunsli-0.1-h9c3ff4c_0.tar.bz2#c1ac6229d0bfd14f8354ff9ad2a26cad https://conda.anaconda.org/conda-forge/linux-64/cfitsio-4.1.0-hd9d235c_0.tar.bz2#ebc04a148d7204bb428f8633b89fd3dd @@ -149,7 +150,7 @@ https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2 https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2 https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.0-h8e229c2_0.tar.bz2#f314f79031fec74adc9bff50fbaffd89 https://conda.anaconda.org/conda-forge/linux-64/fortran-compiler-1.4.2-h2a4ca65_0.tar.bz2#a39c981deb5b50e2ea1464ab9d5c0b10 -https://conda.anaconda.org/conda-forge/noarch/fsspec-2022.5.0-pyhd8ed1ab_0.tar.bz2#db4ffc615663c66a9cc0869ce4d1092b +https://conda.anaconda.org/conda-forge/noarch/fsspec-2022.7.1-pyhd8ed1ab_0.tar.bz2#984db277dfb9ea04a584aea39c6a34e4 https://conda.anaconda.org/conda-forge/linux-64/glib-2.72.1-h6239696_0.tar.bz2#1698b7684d3c6a4d1de2ab946f5b0fb5 https://conda.anaconda.org/conda-forge/noarch/idna-3.3-pyhd8ed1ab_0.tar.bz2#40b50b8b030f5f2f22085c062ed013dd https://conda.anaconda.org/conda-forge/noarch/imagesize-1.4.1-pyhd8ed1ab_0.tar.bz2#7de5386c8fea29e76b303f37dde4c352 @@ -177,15 +178,15 @@ https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 https://conda.anaconda.org/conda-forge/noarch/toolz-0.12.0-pyhd8ed1ab_0.tar.bz2#92facfec94bc02d6ccf42e7173831a36 https://conda.anaconda.org/conda-forge/noarch/wheel-0.37.1-pyhd8ed1ab_0.tar.bz2#1ca02aaf78d9c70d9a81a3bed5752022 -https://conda.anaconda.org/conda-forge/noarch/zipp-3.8.0-pyhd8ed1ab_0.tar.bz2#050b94cf4a8c760656e51d2d44e4632c +https://conda.anaconda.org/conda-forge/noarch/zipp-3.8.1-pyhd8ed1ab_0.tar.bz2#a3508a0c850745b875de88aea4c40cc5 https://conda.anaconda.org/conda-forge/noarch/babel-2.10.3-pyhd8ed1ab_0.tar.bz2#72f1c6d03109d7a70087bc1d029a8eda https://conda.anaconda.org/conda-forge/linux-64/blas-2.115-openblas.tar.bz2#ca9e177657aa07ab306bd1bbcdf80e69 https://conda.anaconda.org/conda-forge/linux-64/certifi-2022.6.15-py39hf3d152e_0.tar.bz2#cf0efee4ef53a6d3ea4dce06ac360f14 https://conda.anaconda.org/conda-forge/linux-64/cffi-1.15.1-py39he91dace_0.tar.bz2#61e961a94c8fd535e4496b17e7452dfe https://conda.anaconda.org/conda-forge/linux-64/compilers-1.4.2-ha770c72_0.tar.bz2#b353fa1271e1a82d37a7d35f4785de13 -https://conda.anaconda.org/conda-forge/linux-64/cython-0.29.30-py39h5a03fae_0.tar.bz2#78d64530b059de26a60f979e02c9fa3c +https://conda.anaconda.org/conda-forge/linux-64/cython-0.29.32-py39h5a03fae_0.tar.bz2#bb64bc0c1ce34178f21bed95a47576d6 https://conda.anaconda.org/conda-forge/linux-64/cytoolz-0.12.0-py39hb9d737c_0.tar.bz2#26740ffa0bfc09bfee7dbe9d226577c9 -https://conda.anaconda.org/conda-forge/linux-64/docutils-0.18.1-py39hf3d152e_1.tar.bz2#9851752658704495f8adf28f6d2b3cb3 +https://conda.anaconda.org/conda-forge/linux-64/docutils-0.19-py39hf3d152e_0.tar.bz2#20f72153a0a168a8591daf4a92f577c0 https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.20.3-hd4edc92_0.tar.bz2#94cb81ffdce328f80c87ac9b01244632 https://conda.anaconda.org/conda-forge/linux-64/importlib-metadata-4.11.4-py39hf3d152e_0.tar.bz2#4c2a0eabf0b8980b2c755646a6f750eb https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.4-py39hf939315_0.tar.bz2#e8d1310648c189d6d11a2e13f73da1fe @@ -200,45 +201,45 @@ https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-14.0-h7f54b18_8.tar.b https://conda.anaconda.org/conda-forge/linux-64/pysocks-1.7.1-py39hf3d152e_5.tar.bz2#d34b97a2386932b97c7cb80916a673e7 https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 https://conda.anaconda.org/conda-forge/linux-64/pyyaml-6.0-py39hb9d737c_4.tar.bz2#dcc47a3b751508507183d17e569805e5 -https://conda.anaconda.org/conda-forge/linux-64/setuptools-63.2.0-py39hf3d152e_0.tar.bz2#0a487a44f996e39d13cdf2899855c406 +https://conda.anaconda.org/conda-forge/linux-64/setuptools-63.4.2-py39hf3d152e_0.tar.bz2#6cdddf948f909c512833840334a68b57 https://conda.anaconda.org/conda-forge/linux-64/tornado-6.2-py39hb9d737c_0.tar.bz2#a3c57360af28c0d9956622af99a521cd https://conda.anaconda.org/conda-forge/linux-64/unicodedata2-14.0.0-py39hb9d737c_1.tar.bz2#ef84376736d1e8a814ccb06d1d814e6f https://conda.anaconda.org/conda-forge/linux-64/brotlipy-0.7.0-py39hb9d737c_1004.tar.bz2#05a99367d885ec9990f25e74128a8a08 https://conda.anaconda.org/conda-forge/linux-64/cryptography-37.0.4-py39hd97740a_0.tar.bz2#edc3668e7b71657237f94cf25e286478 -https://conda.anaconda.org/conda-forge/noarch/dask-core-2022.7.0-pyhd8ed1ab_0.tar.bz2#3b533fc35efb54900c9e4ab06242f8b5 +https://conda.anaconda.org/conda-forge/noarch/dask-core-2022.8.0-pyhd8ed1ab_0.tar.bz2#b097a86c177b459f6c9a68a077cade0e https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.34.4-py39hb9d737c_0.tar.bz2#7980ace37ccb3399672c3a9840e039ed https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.20.3-hf6a322e_0.tar.bz2#6ea2ce6265c3207876ef2369b7479f08 -https://conda.anaconda.org/conda-forge/linux-64/imagecodecs-2022.2.22-py39hfcd4fa2_6.tar.bz2#fb284401002dea344eada09798cd07ec -https://conda.anaconda.org/conda-forge/noarch/imageio-2.19.3-pyhcf75d05_0.tar.bz2#9a5e536d761271c400310ec5dff8c5e1 +https://conda.anaconda.org/conda-forge/linux-64/imagecodecs-2022.7.31-py39h4fc39c6_1.tar.bz2#5c1c596e0febeff31359a6731faaba95 +https://conda.anaconda.org/conda-forge/noarch/imageio-2.21.1-pyhfa7a67d_0.tar.bz2#634d813a25912ac44a003b3ade316a44 https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.2-pyhd8ed1ab_1.tar.bz2#c8490ed5c70966d232fdd389d0dbed37 https://conda.anaconda.org/conda-forge/noarch/joblib-1.1.0-pyhd8ed1ab_0.tar.bz2#07d1b5c8cde14d95998fd4767e1e62d2 https://conda.anaconda.org/conda-forge/noarch/memory_profiler-0.60.0-pyhd8ed1ab_0.tar.bz2#f769ad93cd67c6eb7e932c255ed7d642 https://conda.anaconda.org/conda-forge/linux-64/pandas-1.4.3-py39h1832856_0.tar.bz2#74e00961703972cf33b44a6fca7c3d51 -https://conda.anaconda.org/conda-forge/noarch/pip-22.1.2-pyhd8ed1ab_0.tar.bz2#d29185c662a424f8bea1103270b85c96 +https://conda.anaconda.org/conda-forge/noarch/pip-22.2.2-pyhd8ed1ab_0.tar.bz2#0b43abe4d3ee93e82742d37def53a836 https://conda.anaconda.org/conda-forge/noarch/pygments-2.12.0-pyhd8ed1ab_0.tar.bz2#cb27e2ded147e5bcc7eafc1c6d343cb3 https://conda.anaconda.org/conda-forge/linux-64/pytest-7.1.2-py39hf3d152e_0.tar.bz2#a6bcf633d12aabdfc4cb32a09ebc0f31 https://conda.anaconda.org/conda-forge/linux-64/pywavelets-1.3.0-py39hd257fcd_1.tar.bz2#c4b698994b2d8d2e659ae02202e6abe4 -https://conda.anaconda.org/conda-forge/linux-64/scipy-1.8.1-py39he49c0e8_0.tar.bz2#b1b4cc4216e555168e88d6a2b1914af1 +https://conda.anaconda.org/conda-forge/linux-64/scipy-1.9.0-py39h8ba3f38_0.tar.bz2#b098a256777cb9e2605451f183c78768 https://conda.anaconda.org/conda-forge/linux-64/sip-6.6.2-py39h5a03fae_0.tar.bz2#e37704c6be07b8b14ffc1ce912802ce0 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.5.2-py39h700656a_0.tar.bz2#ab1bcd0fd24e375f16d662e4cc783cab +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.5.2-py39h700656a_1.tar.bz2#b8d1b075536dfca33459870dbb824886 https://conda.anaconda.org/conda-forge/noarch/patsy-0.5.2-pyhd8ed1ab_0.tar.bz2#2e4e8be763551f60bbfcc22b650e5d49 -https://conda.anaconda.org/conda-forge/linux-64/pyamg-4.2.3-py39hac2352c_0.tar.bz2#9df63dea0e4d2163be63b8e40c17ceaf +https://conda.anaconda.org/conda-forge/linux-64/pyamg-4.2.3-py39hac2352c_1.tar.bz2#6fb0628d6195d8b6caa2422d09296399 https://conda.anaconda.org/conda-forge/noarch/pyopenssl-22.0.0-pyhd8ed1ab_0.tar.bz2#1d7e241dfaf5475e893d4b824bb71b44 https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.11.0-py39h5a03fae_0.tar.bz2#1fd9112714d50ee5be3dbf4fd23964dc https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.4.0-pyhd8ed1ab_0.tar.bz2#95286e05a617de9ebfe3246cecbfb72f https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.4-ha5833f6_2.tar.bz2#dd3aa6715b9e9efaf842febf18ce4261 -https://conda.anaconda.org/conda-forge/noarch/tifffile-2022.5.4-pyhd8ed1ab_0.tar.bz2#8c8c68b9d466b8804584abe11a75c396 +https://conda.anaconda.org/conda-forge/noarch/tifffile-2022.8.3-pyhd8ed1ab_0.tar.bz2#ebecab793338121579e65b3cd832d760 https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.7-py39h18e9c17_0.tar.bz2#5ed8f83afff3b64fa91f7a6af8d7ff04 https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e https://conda.anaconda.org/conda-forge/linux-64/scikit-image-0.19.3-py39h1832856_0.tar.bz2#5d638481c48c57fdb986490a4a6983fc https://conda.anaconda.org/conda-forge/noarch/seaborn-base-0.11.2-pyhd8ed1ab_0.tar.bz2#fe2303dc8f1febeb82d927ce8ad153ed https://conda.anaconda.org/conda-forge/linux-64/statsmodels-0.13.2-py39hd257fcd_0.tar.bz2#bd7cdadf70e34a19333c3aacc40206e8 -https://conda.anaconda.org/conda-forge/noarch/urllib3-1.26.10-pyhd8ed1ab_0.tar.bz2#14f22c5b9cfd0d93c2806faaa3fe6dec -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.5.2-py39hf3d152e_0.tar.bz2#d65d073d186977a2a9a9d5a68b2b77ef +https://conda.anaconda.org/conda-forge/noarch/urllib3-1.26.11-pyhd8ed1ab_0.tar.bz2#0738978569b10669bdef41c671252dd1 +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.5.2-py39hf3d152e_1.tar.bz2#7a3ff2d1e66e674631a65ea32d5cdbbd https://conda.anaconda.org/conda-forge/noarch/requests-2.28.1-pyhd8ed1ab_0.tar.bz2#70d6e72856de9551f83ae0f2de689a7a https://conda.anaconda.org/conda-forge/noarch/seaborn-0.11.2-hd8ed1ab_0.tar.bz2#e56b6a19f4b717eca7c68ad78196b075 -https://conda.anaconda.org/conda-forge/noarch/sphinx-5.0.2-pyh6c4a22f_0.tar.bz2#d4eaa1f50733a377480ce1d5aac556c7 +https://conda.anaconda.org/conda-forge/noarch/sphinx-5.1.1-pyhd8ed1ab_1.tar.bz2#cd1129e88f6278787212624e1b7a8001 https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.4.0-pyhd8ed1ab_1.tar.bz2#0aac89c61a466b0f9c4fd0ec44d81f1d -https://conda.anaconda.org/conda-forge/noarch/sphinx-gallery-0.10.1-pyhd8ed1ab_0.tar.bz2#4918585fe5e5341740f7e63c61743efb +https://conda.anaconda.org/conda-forge/noarch/sphinx-gallery-0.11.0-pyhd8ed1ab_0.tar.bz2#9fcb2988f0d82b9af2131ef4b4567240 https://conda.anaconda.org/conda-forge/noarch/sphinx-prompt-1.4.0-pyhd8ed1ab_0.tar.bz2#88ee91e8679603f2a5bd036d52919cc2 -# pip sphinxext-opengraph @ https://files.pythonhosted.org/packages/58/ed/59df64b8400caf736f38bd3725ab9b1d9e50874f629980973aea090c1a8b/sphinxext_opengraph-0.6.3-py3-none-any.whl#md5=None +# pip sphinxext-opengraph @ https://files.pythonhosted.org/packages/58/ed/59df64b8400caf736f38bd3725ab9b1d9e50874f629980973aea090c1a8b/sphinxext_opengraph-0.6.3-py3-none-any.whl#sha256=bf76017c105856b07edea6caf4942b6ae9bb168585dccfd6dbdb6e4161f6b03a diff --git a/build_tools/github/doc_min_dependencies_linux-64_conda.lock b/build_tools/github/doc_min_dependencies_linux-64_conda.lock index 10d531bc9005d..16a4107455307 100644 --- a/build_tools/github/doc_min_dependencies_linux-64_conda.lock +++ b/build_tools/github/doc_min_dependencies_linux-64_conda.lock @@ -1,6 +1,6 @@ # Generated by conda-lock. # platform: linux-64 -# input_hash: a6aaffac15d19e8ed6fc40eddb398e18e9252fb737bf8c60f99f93e7d41ac5ce +# input_hash: d7c5b73a8e5033accbbf23734674ee617f6e77fbc98fe96e654c88b245458829 @EXPLICIT https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2022.6.15-ha878542_0.tar.bz2#c320890f77fd1d617fa876e0982002c2 @@ -23,12 +23,12 @@ https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-7.5.0-habd7529 https://conda.anaconda.org/conda-forge/linux-64/giflib-5.2.1-h36c2ea0_2.tar.bz2#626e68ae9cc5912d6adb79d318cf962d https://conda.anaconda.org/conda-forge/linux-64/icu-64.2-he1b5a44_1.tar.bz2#8e881214a23508f1541eb7a3135d6fcb https://conda.anaconda.org/conda-forge/linux-64/jpeg-9e-h166bdaf_2.tar.bz2#ee8b844357a0946870901c7c6f418268 -https://conda.anaconda.org/conda-forge/linux-64/lerc-3.0-h9c3ff4c_0.tar.bz2#7fcefde484980d23f0ec24c11e314d2e -https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.12-h166bdaf_0.tar.bz2#d56e3db8fa642fb383f18f5be35eeef2 +https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f +https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.13-h166bdaf_0.tar.bz2#4b5bee2e957570197327d0b20a718891 https://conda.anaconda.org/conda-forge/linux-64/libffi-3.2.1-he1b5a44_1007.tar.bz2#11389072d7d6036fd811c3d9460475cd https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.16-h516909a_0.tar.bz2#5c0f338a513a2943c659ae619fca9211 https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.32.1-h7f98852_1000.tar.bz2#772d69f030955d9646d3d0eaf21d859d -https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.2.3-h166bdaf_0.tar.bz2#3d6168ac3560d473e52a7cb836400135 +https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.2.3-h166bdaf_2.tar.bz2#99c0160c84e61348aa8eb2949b24e6d3 https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-0.10.0-he1b5a44_0.tar.bz2#78ccac2098edcd3673af2ceb3e95f932 https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.12-h166bdaf_2.tar.bz2#8302381297332ea50532cf2c67961080 https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.3-h9c3ff4c_1.tar.bz2#fbe97e8fa6f275d7c76a09e795adc3e6 @@ -51,28 +51,28 @@ https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-14.0.4-he0ac6c6_0.ta https://conda.anaconda.org/conda-forge/linux-64/readline-8.1.2-h0f457ee_0.tar.bz2#db2ebbe2943aae81ed051a6a9af8e0fa https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.12-h27826a3_0.tar.bz2#5b8c42eb62e9fc961af70bdd6a26e168 https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.12-h166bdaf_2.tar.bz2#4533821485cde83ab12ff3d8bda83768 -https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.2-h8a70e8d_2.tar.bz2#78c26dbb6e07d95ccc0eab8d4540aa0c +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.2-h8a70e8d_3.tar.bz2#aece73b1c2f00e86cb9e4f16fab91d96 https://conda.anaconda.org/conda-forge/linux-64/c-compiler-1.1.1-h516909a_0.tar.bz2#d98aa4948ec35f52907e2d6152e2b255 https://conda.anaconda.org/conda-forge/linux-64/gfortran_linux-64-7.5.0-h78c8a43_33.tar.bz2#b2879010fb369f4012040f7a27657cd8 https://conda.anaconda.org/conda-forge/linux-64/gxx_linux-64-7.5.0-h555fc39_33.tar.bz2#5cf979793d2c5130a012cb6480867adc https://conda.anaconda.org/conda-forge/linux-64/libclang-9.0.1-default_hb4e5071_5.tar.bz2#9dde69aa2a8ecd575a16e44987bdc9f7 https://conda.anaconda.org/conda-forge/linux-64/libglib-2.66.3-hbe7bbb4_0.tar.bz2#d5a09a9e981849b751cb75656b7302a0 https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.37-h753d276_3.tar.bz2#3e868978a04de8bf65a97bb86760f47a -https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.4.0-hc85c160_1.tar.bz2#151f9fae3ab50f039c8735e47770aa2d +https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.4.0-h0e0dad5_3.tar.bz2#5627d42c13a9b117ae1701c6e195624f https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.9.10-hee79883_0.tar.bz2#0217b0926808b1adf93247bba489d733 https://conda.anaconda.org/conda-forge/linux-64/mkl-2020.4-h726a3e6_304.tar.bz2#b9b35a50e5377b19da6ec0709ae77fc3 -https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.39.1-h4ff8645_0.tar.bz2#6acda9d2a3ea84b58637b8f880bbf29b +https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.39.2-h4ff8645_0.tar.bz2#2cf5cb4cd116a78e639977eb61ad9987 https://conda.anaconda.org/conda-forge/linux-64/cxx-compiler-1.1.1-hc9558a2_0.tar.bz2#1eb7c67eb11eab0c98a87f84174fdde1 https://conda.anaconda.org/conda-forge/linux-64/fortran-compiler-1.1.1-he991be0_0.tar.bz2#e38ac82cc517b9e245c1ae99f9f140da https://conda.anaconda.org/conda-forge/linux-64/freetype-2.10.4-h0708190_1.tar.bz2#4a06f2ac2e5bfae7b6b245171c3f07aa https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.12-hddcbb42_0.tar.bz2#797117394a4aa588de6d741b06fad80f https://conda.anaconda.org/conda-forge/linux-64/libblas-3.8.0-20_mkl.tar.bz2#8fbce60932c01d0e193a1a814f2002be -https://conda.anaconda.org/conda-forge/linux-64/libwebp-1.2.3-h522a892_0.tar.bz2#96e218d06394a4e4d77028cf70162a09 +https://conda.anaconda.org/conda-forge/linux-64/libwebp-1.2.3-h522a892_1.tar.bz2#424fabaabbfb6ec60492d3aba900f513 https://conda.anaconda.org/conda-forge/linux-64/nss-3.78-h2350873_0.tar.bz2#ab3df39f96742e6f1a9878b09274c1dc https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.4.0-hb52868f_1.tar.bz2#b7ad78ad2e9ee155f59e6428406ee824 https://conda.anaconda.org/conda-forge/linux-64/python-3.8.6-h852b56e_0_cpython.tar.bz2#dd65401dfb61ac030edc0dc4d15c2c51 https://conda.anaconda.org/conda-forge/noarch/alabaster-0.7.12-py_0.tar.bz2#2489a97287f90176ecdc3ca982b4b0a0 -https://conda.anaconda.org/conda-forge/noarch/attrs-21.4.0-pyhd8ed1ab_0.tar.bz2#f70280205d7044c8b8358c8de3190e5d +https://conda.anaconda.org/conda-forge/noarch/attrs-22.1.0-pyh71513ae_1.tar.bz2#6d3ccbc56256204925bfa8378722792f https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-2.1.0-pyhd8ed1ab_0.tar.bz2#abc0453b6e7bfbb87d275d58e333fc98 https://conda.anaconda.org/conda-forge/noarch/cloudpickle-2.1.0-pyhd8ed1ab_0.tar.bz2#f7551a8a008dfad2b7ac9662dd124614 https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.5-pyhd8ed1ab_0.tar.bz2#c267da48ce208905d7d976d49dfd9433 @@ -80,7 +80,7 @@ https://conda.anaconda.org/conda-forge/linux-64/compilers-1.1.1-0.tar.bz2#1ba267 https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2 https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.0-h8e229c2_0.tar.bz2#f314f79031fec74adc9bff50fbaffd89 -https://conda.anaconda.org/conda-forge/noarch/fsspec-2022.5.0-pyhd8ed1ab_0.tar.bz2#db4ffc615663c66a9cc0869ce4d1092b +https://conda.anaconda.org/conda-forge/noarch/fsspec-2022.7.1-pyhd8ed1ab_0.tar.bz2#984db277dfb9ea04a584aea39c6a34e4 https://conda.anaconda.org/conda-forge/linux-64/glib-2.66.3-h58526e2_0.tar.bz2#62c2e5c84f6cdc7ded2307ef9c30dc8c https://conda.anaconda.org/conda-forge/noarch/idna-3.3-pyhd8ed1ab_0.tar.bz2#40b50b8b030f5f2f22085c062ed013dd https://conda.anaconda.org/conda-forge/noarch/imagesize-1.4.1-pyhd8ed1ab_0.tar.bz2#7de5386c8fea29e76b303f37dde4c352 @@ -131,15 +131,15 @@ https://conda.anaconda.org/conda-forge/linux-64/tornado-6.2-py38h0a891b7_0.tar.b https://conda.anaconda.org/conda-forge/linux-64/blas-2.20-mkl.tar.bz2#e7d09a07f5413e53dca5282b8fa50bed https://conda.anaconda.org/conda-forge/linux-64/brotlipy-0.7.0-py38h0a891b7_1004.tar.bz2#9fcaaca218dcfeb8da806d4fd4824aa0 https://conda.anaconda.org/conda-forge/linux-64/cryptography-37.0.4-py38h2b5fc30_0.tar.bz2#28e9acd6f13ed29f27d5550a1cf0554b -https://conda.anaconda.org/conda-forge/noarch/dask-core-2022.7.0-pyhd8ed1ab_0.tar.bz2#3b533fc35efb54900c9e4ab06242f8b5 +https://conda.anaconda.org/conda-forge/noarch/dask-core-2022.8.0-pyhd8ed1ab_0.tar.bz2#b097a86c177b459f6c9a68a077cade0e https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.14.5-h0935bb2_2.tar.bz2#eb125ee86480e00a4a1ed45a577c3311 -https://conda.anaconda.org/conda-forge/noarch/imageio-2.19.3-pyhcf75d05_0.tar.bz2#9a5e536d761271c400310ec5dff8c5e1 +https://conda.anaconda.org/conda-forge/noarch/imageio-2.21.1-pyhfa7a67d_0.tar.bz2#634d813a25912ac44a003b3ade316a44 https://conda.anaconda.org/conda-forge/noarch/jinja2-2.11.3-pyhd8ed1ab_2.tar.bz2#bdedf6199eec03402a0c5db1f25e891e https://conda.anaconda.org/conda-forge/noarch/joblib-1.1.0-pyhd8ed1ab_0.tar.bz2#07d1b5c8cde14d95998fd4767e1e62d2 https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.1.2-py38h250f245_1.tar.bz2#0ae46309d21c964547792bac48162fc8 https://conda.anaconda.org/conda-forge/noarch/memory_profiler-0.60.0-pyhd8ed1ab_0.tar.bz2#f769ad93cd67c6eb7e932c255ed7d642 https://conda.anaconda.org/conda-forge/linux-64/pandas-1.0.5-py38hcb8c335_0.tar.bz2#1e1b4382170fd26cf722ef008ffb651e -https://conda.anaconda.org/conda-forge/noarch/pip-22.1.2-pyhd8ed1ab_0.tar.bz2#d29185c662a424f8bea1103270b85c96 +https://conda.anaconda.org/conda-forge/noarch/pip-22.2.2-pyhd8ed1ab_0.tar.bz2#0b43abe4d3ee93e82742d37def53a836 https://conda.anaconda.org/conda-forge/noarch/pygments-2.12.0-pyhd8ed1ab_0.tar.bz2#cb27e2ded147e5bcc7eafc1c6d343cb3 https://conda.anaconda.org/conda-forge/linux-64/pytest-7.1.2-py38h578d9bd_0.tar.bz2#626d2b8f96c8c3d20198e6bd84d1cfb7 https://conda.anaconda.org/conda-forge/linux-64/pywavelets-1.1.1-py38h5c078b8_3.tar.bz2#dafeef887e68bd18ec84681747ca0fd5 @@ -154,7 +154,7 @@ https://conda.anaconda.org/conda-forge/noarch/seaborn-base-0.11.2-pyhd8ed1ab_0.t https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.12.3-py38ha8c2ead_3.tar.bz2#242c206b0c30fdc4c18aea16f04c4262 https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e https://conda.anaconda.org/conda-forge/linux-64/statsmodels-0.12.2-py38h5c078b8_0.tar.bz2#33787719ad03d33cffc4e2e3ea82bc9e -https://conda.anaconda.org/conda-forge/noarch/urllib3-1.26.10-pyhd8ed1ab_0.tar.bz2#14f22c5b9cfd0d93c2806faaa3fe6dec +https://conda.anaconda.org/conda-forge/noarch/urllib3-1.26.11-pyhd8ed1ab_0.tar.bz2#0738978569b10669bdef41c671252dd1 https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.1.2-py38_1.tar.bz2#c2b9671a19c01716c37fe0a0e18b0aec https://conda.anaconda.org/conda-forge/noarch/requests-2.28.1-pyhd8ed1ab_0.tar.bz2#70d6e72856de9551f83ae0f2de689a7a https://conda.anaconda.org/conda-forge/noarch/seaborn-0.11.2-hd8ed1ab_0.tar.bz2#e56b6a19f4b717eca7c68ad78196b075 @@ -162,4 +162,4 @@ https://conda.anaconda.org/conda-forge/noarch/sphinx-4.0.1-pyh6c4a22f_2.tar.bz2# https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.2-pyhd8ed1ab_0.tar.bz2#025ad7ca2c7f65007ab6b6f5d93a56eb https://conda.anaconda.org/conda-forge/noarch/sphinx-gallery-0.7.0-py_0.tar.bz2#80bad3f857ecc86a4ab73f3e57addd13 https://conda.anaconda.org/conda-forge/noarch/sphinx-prompt-1.3.0-py_0.tar.bz2#9363002e2a134a287af4e32ff0f26cdc -# pip sphinxext-opengraph @ https://files.pythonhosted.org/packages/50/ac/c105ed3e0a00b14b28c0aa630935af858fd8a32affeff19574b16e2c6ae8/sphinxext_opengraph-0.4.2-py3-none-any.whl#md5=None +# pip sphinxext-opengraph @ https://files.pythonhosted.org/packages/50/ac/c105ed3e0a00b14b28c0aa630935af858fd8a32affeff19574b16e2c6ae8/sphinxext_opengraph-0.4.2-py3-none-any.whl#sha256=a51f2604f9a5b6c0d25d3a88e694d5c02e20812dc0e482adf96c8628f9109357 diff --git a/doc/conf.py b/doc/conf.py index 799ce4b74dd5c..3676bfbdf2161 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -262,6 +262,8 @@ for old_link in redirects: html_additional_pages[old_link] = "redirects.html" +# Not showing the search summary makes the search page load faster. +html_show_search_summary = False # -- Options for LaTeX output ------------------------------------------------ latex_elements = { diff --git a/doc/themes/scikit-learn-modern/search.html b/doc/themes/scikit-learn-modern/search.html index 2d06835afabb6..f36730892a749 100644 --- a/doc/themes/scikit-learn-modern/search.html +++ b/doc/themes/scikit-learn-modern/search.html @@ -4,5 +4,5 @@ - + {% endblock %} diff --git a/doc/themes/scikit-learn-modern/static/js/searchtools.js b/doc/themes/scikit-learn-modern/static/js/searchtools.js deleted file mode 100644 index 0d4ca2328b079..0000000000000 --- a/doc/themes/scikit-learn-modern/static/js/searchtools.js +++ /dev/null @@ -1,595 +0,0 @@ -/* - * searchtools.js - * ~~~~~~~~~~~~~~~~ - * - * Sphinx JavaScript utilities for the full-text search. - * - * :copyright: Copyright 2007-2019 by the Sphinx team, see AUTHORS. - * :license: BSD, see LICENSE for details. - * - * CHANGELOG: - * - Removes ajax call to get context for each result - * - Adjusts Search.query to remove duplicates in search results. - * - Adjusts Scorer to rank objects higher. - * - Adds Search._total_non_object_results to limit the number of search non - * object results. Object results do not perform another GET resquest, so they - * are cheap to display. - */ - -if (!Scorer) { - /** - * Simple result scoring code. - */ - var Scorer = { - // Implement the following function to further tweak the score for each result - // The function takes a result array [filename, title, anchor, descr, score] - // and returns the new score. - /* - score: function(result) { - return result[4]; - }, - */ - - // query matches the full name of an object - objNameMatch: 15, - // or matches in the last dotted part of the object name - objPartialMatch: 15, - // Additive scores depending on the priority of the object - objPrio: { - 0: 15, // used to be importantResults - 1: 5, // used to be objectResults - 2: -5 - }, // used to be unimportantResults - // Used when the priority is not in the mapping. - objPrioDefault: 0, - - // query found in title - title: 15, - partialTitle: 7, - // query found in terms - term: 10, - partialTerm: 2 - }; -} - -if (!splitQuery) { - function splitQuery(query) { - return query.split(/\s+/); - } -} - -/** - * Search Module - */ -var Search = { - _index: null, - _queued_query: null, - _pulse_status: -1, - _total_non_object_results: 10, - - htmlToText: function (htmlString) { - var htmlString = htmlString.replace(//g, ""); - var htmlElement = document.createElement("span"); - htmlElement.innerHTML = htmlString; - $(htmlElement) - .find(".headerlink") - .remove(); - docContent = $(htmlElement).find("[role=main]")[0]; - return docContent.textContent || docContent.innerText; - }, - - init: function () { - var params = $.getQueryParameters(); - if (params.q) { - var query = params.q[0]; - $('input[name="q"]')[0].value = query; - this.performSearch(query); - } - }, - - loadIndex: function (url) { - $.ajax({ - type: "GET", - url: url, - data: null, - dataType: "script", - cache: true, - complete: function (jqxhr, textstatus) { - if (textstatus != "success") { - document.getElementById("searchindexloader").src = url; - } - } - }); - }, - - setIndex: function (index) { - var q; - this._index = index; - if ((q = this._queued_query) !== null) { - this._queued_query = null; - Search.query(q); - } - }, - - hasIndex: function () { - return this._index !== null; - }, - - deferQuery: function (query) { - this._queued_query = query; - }, - - stopPulse: function () { - this._pulse_status = 0; - }, - - startPulse: function () { - if (this._pulse_status >= 0) return; - function pulse() { - var i; - Search._pulse_status = (Search._pulse_status + 1) % 4; - var dotString = ""; - for (i = 0; i < Search._pulse_status; i++) dotString += "."; - Search.dots.text(dotString); - if (Search._pulse_status > -1) window.setTimeout(pulse, 500); - } - pulse(); - }, - - /** - * perform a search for something (or wait until index is loaded) - */ - performSearch: function (query) { - // create the required interface elements - this.out = $("#search-results"); - this.title = $("

    " + _("Searching") + "

    ").appendTo(this.out); - this.dots = $("").appendTo(this.title); - this.status = $('

     

    ').appendTo(this.out); - this.output = $('